taxa_list <- c("Quercus", "Cupressaceae", "Ambrosia", "Morus", "Pinaceae", "Ulmus early", "Ulmus late", "Fraxinus", "Betula", "Poaceae early", "Poaceae late", "Acer", "Populus")
taxa_short_list <- str_split(taxa_list, pattern = " ", simplify = T) [, 1]
site_list <- c("NY", "SJ", "AT", "ST", "HT", "TP", "DT", "DV", "KC", "SL")
sitename_list <- c("New York", "San Jose", "Austin", "Seattle", "Houston", "Tampa", "Detroit", "Denver", "Kansas City", "St. Louis")
year_list <- 2018:2021
nab_path <- "/data/ZHULAB/phenology/nab/2021-10-04/"
# read in station coordinates
station_df <- read_csv("/data/ZHULAB/phenology/nab/NAB stations.csv") %>% # this csv is manually typed
mutate(id = row_number())
file_list <- list.files(path = nab_path, pattern = ".xlsx", full.names = T)
nab_df_list <- vector(mode = "list", length = length(file_list))
for (i in 1:length(file_list)) {
file <- file_list[i]
# read in all data
dat <- read_excel(
file,
col_names = F
)
start_n <- which(dat[, 1] == "Date") # excel files have headings of different number of rows. Search for the row(s) that starts with "Date" as the start of data table(s).
# read in meta data
if (min(start_n) == 1) { # Some excel files have no meta data before data table
meta_dat <- NA
station <- NA
} else { # If there are meta data before data table, read it as a vector
meta_dat <- read_excel(
file,
col_names = F,
n_max = start_n[1] - 1
) %>% pull()
station <- meta_dat[2] %>%
strsplit(split = ": ") %>%
unlist() %>%
tail(1) # extract station name from meta data vector
}
location <- file %>%
strsplit(split = c("/")) %>%
unlist() %>%
tail(1) %>%
strsplit(split = " \\(") %>%
unlist() %>%
head(1) # city name extracted from file name
# get coordinates of station
if (!is.na(station)) { # use both city and station name
station_info <- data.frame(station = station, location = location) %>%
stringdist_inner_join(station_df, by = c("location", "station"), max_dist = 20, distance_col = "distance") %>% # join with all entries in station info, measuring dissimilarity in station and location, because their names might be slightly different in both tables
arrange(station.distance, location.distance) %>%
head(1) %>% # there might be several close matches. take the closest.
rename(station = station.y) %>%
rename(location = location.y) %>%
dplyr::select(-station.x, -location.x, -station.distance, -location.distance, -distance)
} else { # use only city name if station name is not available
station_info <- data.frame(location = location) %>%
stringdist_inner_join(station_df, by = "location", max_dist = 20, distance_col = "distance") %>%
arrange(distance) %>%
head(1) %>%
rename(location = location.y) %>%
dplyr::select(-location.x, -distance)
}
# Extract pollen and spore data table
if (length(start_n) == 1) { # only pollen, no spores
colnum <- ncol(read_excel(
file,
skip = start_n[1] - 1
)) # find number of columns
pollen_dat <- read_excel(
file,
skip = start_n[1] - 1,
col_types = c(
"date",
rep("numeric", colnum - 1)
)
)
if (is.na(pollen_dat[2, 1])) { # meaning finer taxonomic resolutions available
genus_names <- read_excel(
file,
skip = start_n[1] - 1
) %>%
slice(1) %>% # read the first row with finer taxonomic resolution
gather(key = "old", value = "new") %>% # coarse and fine taxonomy into long format
mutate(new = case_when(
is.na(new) ~ old,
TRUE ~ new
)) # sometimes only coarse taxonomy is present, then keep that
pollen_dat <- read_excel(
file,
skip = start_n[1] - 1,
col_types = c(
"date",
rep("numeric", colnum - 1)
),
col_names = genus_names$new # change to new colnames with finer taxonomic resolution
)
}
pollen_dat <- pollen_dat %>%
filter(!is.na(Date)) %>% # filter out rows that are not count data
gather(key = "taxa", value = "count", -Date) %>% # to long format
filter(!str_detect(taxa, "Station")) %>% # filter out additional info that are not count data, like "Station Name", "Station Postal Code", "Station State"
filter(!taxa %in% c("Comment", "WeatherNotes")) %>%
mutate(group = "pollen") %>%
rowwise() %>%
group_by(Date, taxa, group) %>%
summarize(count = sum(count)) %>%
ungroup()
# combine with station info
nab_df_list[[i]] <- pollen_dat %>%
cbind(station_info)
}
if (length(start_n) == 2) { # both pollen and spores
colnum_pollen <- ncol(read_excel(
file,
skip = start_n[1] - 1,
n_max = (start_n[2] - 2) - (start_n[1] + 1)
))
pollen_dat <- read_excel(
file,
skip = start_n[1] - 1,
n_max = (start_n[2] - 2) - (start_n[1] + 1),
col_types = c(
"date",
rep("numeric", colnum_pollen - 1)
)
)
if (is.na(pollen_dat[2, 1])) { # meaning finer taxonomic resolutions available
genus_names <- read_excel(
file,
skip = start_n[1] - 1,
n_max = (start_n[2] - 2) - (start_n[1] + 1)
) %>%
slice(1) %>%
gather(key = "old", value = "new") %>%
mutate(new = case_when(
is.na(new) ~ old,
TRUE ~ new
))
pollen_dat <- read_excel(
file,
skip = start_n[1] - 1,
n_max = (start_n[2] - 2) - (start_n[1] + 1),
col_types = c(
"date",
rep("numeric", colnum_pollen - 1)
),
col_names = genus_names$new
)
}
pollen_dat <- pollen_dat %>%
filter(!is.na(Date)) %>%
gather(key = "taxa", value = "count", -Date) %>%
filter(!str_detect(taxa, "Station")) %>%
filter(!taxa %in% c("Comment", "WeatherNotes")) %>%
mutate(group = "pollen") %>%
rowwise() %>%
group_by(Date, taxa, group) %>%
summarize(count = sum(count)) %>%
ungroup()
colname_spore <- ncol(read_excel(
file,
skip = start_n[2] - 1
))
spore_dat <- read_excel(
file,
skip = start_n[2] - 1,
col_types = c(
"date",
rep("numeric", colname_spore - 1)
)
) %>%
filter(!is.na(Date)) %>%
gather(key = "taxa", value = "count", -Date) %>%
filter(!str_detect(taxa, "Station")) %>%
filter(!taxa %in% c("Comment", "WeatherNotes")) %>%
mutate(group = "spore")
if (nrow(spore_dat) != 0) {
nab_df_list[[i]] <- bind_rows(pollen_dat, spore_dat) %>%
cbind(station_info)
} else {
nab_df_list[[i]] <- pollen_dat %>%
cbind(station_info)
}
}
print(i)
}
nab_df <- bind_rows(nab_df_list)
write_rds(nab_df, file = "/data/ZHULAB/phenology/nab/nab_dat.rds")
nab_df <- read_rds("/data/ZHULAB/phenology/nab/nab_dat.rds")
## get all distinct taxa and clean up their names
nab_taxa_df <- nab_df %>%
distinct(taxa) %>%
filter(!taxa %in% c("Total Pollen Count", "Total Spore Count")) %>%
rename(taxa_raw = taxa) %>%
rowwise() %>%
mutate(taxa_clean = str_split(taxa_raw, pattern = "/", simplify = T)[1]) %>% # some taxa have alternative names, e.g., Chenopodiaceae/Amaranthaceae
mutate(taxa_clean = str_split(taxa_clean, pattern = " \\(", simplify = T)[1]) %>% # e.g., Asteraceae (Excluding Ambrosia and Artemisia)
mutate(taxa_clean = str_replace(taxa_clean, pattern = "-type", "")) %>% # e.g., Leptosphaeria-type
mutate(taxa_clean = str_replace(taxa_clean, pattern = "Unidentified ", "")) %>% # e.g., Unidentified Fungi
mutate(taxa_clean = str_replace(taxa_clean, pattern = "Undifferentiated ", "")) %>% # e.g., Undifferentiated Ascospores
mutate(taxa_clean = str_replace(taxa_clean, pattern = "Other ", "")) %>% # e.g., Other Grass Pollen
mutate(taxa_clean = str_replace(taxa_clean, pattern = "spores", "mycota")) %>% # e.g., Undifferentiated Ascospores
mutate(taxa_clean = str_replace(taxa_clean, pattern = " ", "")) %>% # remove space
mutate(taxa_clean = case_when(
taxa_clean == "Rusts" ~ "Pucciniales",
taxa_clean == "Smuts" ~ "Myxomycetes",
taxa_clean == "Dreshslera" ~ "Helminthosporium",
(taxa_clean == "GrassPollen" | taxa_clean == "WeedPollen") ~ "Gramineae",
taxa_clean == "TreePollen" ~ "Tracheophyta",
taxa_clean == "Pollen" ~ "Viridiplantae",
TRUE ~ taxa_clean
)) %>% # manual cleaning
arrange(taxa_clean)
## Use taxize to resolve taxonomy
# Find sources id
# gnr_datasources()
# match with names in databases
resolve_df <- nab_taxa_df %>%
pull(taxa_clean) %>%
gnr_resolve(data_source_ids = c(4, 11), with_context = T, best_match_only = T, fields = "all") %>% # NCBI and GBIF databases
full_join(nab_taxa_df,
by = c("user_supplied_name" = "taxa_clean")
) %>%
rename(taxa_clean = user_supplied_name) %>%
mutate(same = (taxa_clean == matched_name)) # check if all taxa names are valid
# some taxa are incorrectly identified as Metazoa. Resolve again for those.
resolve_df_correct <- resolve_df %>%
filter(str_detect(classification_path, "Metazoa")) %>%
pull(taxa_clean) %>%
gnr_resolve(data_source_ids = c(4, 11), best_match_only = F, fields = "all") %>% # NCBI and GBIF. Keep all matches here, not only the best match.
filter(!str_detect(classification_path, "Animalia")) %>%
filter(!str_detect(classification_path, "Metazoa")) %>%
rename(taxa_clean = user_supplied_name) %>%
group_by(taxa_clean) %>%
slice(1) %>%
ungroup()
# Correct previously resolved taxonomy
resolve_df <- resolve_df %>%
filter(!str_detect(classification_path, "Metazoa")) %>%
bind_rows(resolve_df_correct) %>%
arrange(taxa_clean)
# make sure all taxa are resolved
# resolve_df %>% filter(!same) %>% View()
# get full classification
taxa_id_df <- resolve_df %>%
dplyr::select(taxa_clean, data_source_id, taxon_id) %>%
distinct(taxa_clean, .keep_all = T) %>%
mutate(data_source = case_when(
data_source_id == 4 ~ "ncbi",
data_source_id == 11 ~ "gbif"
))
taxa_class_df <- vector(mode = "list", length = nrow(taxa_id_df))
for (i in 1:nrow(taxa_id_df)) {
taxa_class_df[[i]] <-
classification(taxa_id_df$taxon_id[i], db = taxa_id_df$data_source[i]) [[1]] %>%
as_tibble() %>%
filter(rank %in% c("kingdom", "phylum", "class", "order", "family", "genus", "species")) %>%
mutate(rank = factor(rank, levels = c("kingdom", "phylum", "class", "order", "family", "genus", "species"))) %>%
dplyr::select(-id) %>%
spread(key = "rank", value = "name") %>%
mutate(taxa_clean = taxa_id_df$taxa_clean[i])
}
taxa_class_df <- bind_rows(taxa_class_df)
nab_taxa_df <- nab_taxa_df %>%
left_join(taxa_class_df, by = "taxa_clean") %>%
mutate(kingdom = str_replace(kingdom, "Plantae", "Viridiplantae")) %>% # manual correction
mutate(kingdom = case_when(
phylum == "Oomycota" ~ "Chromista",
TRUE ~ kingdom
))
write_rds(nab_taxa_df, "/data/ZHULAB/phenology/nab/nab_taxa.rds")
Combine nab data and taxa information. Some preprocessing.
nab_df <- read_rds("/data/ZHULAB/phenology/nab/nab_dat.rds")
nab_taxa_df <- read_rds("/data/ZHULAB/phenology/nab/nab_taxa.rds")
nab_with_taxa_df <- nab_df %>%
rename(taxa_raw = taxa) %>%
left_join(nab_taxa_df, by = "taxa_raw") %>%
rename(taxa = taxa_clean) %>%
mutate(family = case_when(
taxa_raw == "Total Pollen Count" ~ "Total",
TRUE ~ family
)) %>%
mutate(genus = case_when(
taxa_raw == "Total Pollen Count" ~ "Total",
TRUE ~ genus
)) %>%
filter(kingdom == "Viridiplantae" | is.na(kingdom)) %>%
group_by(Date, lat, lon, station,location, id, family, genus, taxa) %>%
summarise(count = sum(count)) %>%
ungroup() %>%
mutate(date = as.Date(Date)) %>%
dplyr::select(-Date)
Focus on ten cities. * Exceptions: Denver pollen data are from Colorado Springs; Austin pollen data are from Georgetown; Detroit pollen data are from Sylvania.
# summarize station info
meta_df <- nab_with_taxa_df %>%
filter(taxa %in% taxa_short_list) %>% # limit to taxa studied
drop_na(count) %>%
group_by(station, location, lat, lon, id) %>%
summarise(
mindate = min(date),
maxdate = max(date),
n = n()
) %>%
mutate(range = maxdate - mindate) %>%
ungroup() %>%
arrange(desc(n)) %>%
mutate(site = NA)
# match NAB stations with cities studied
meta_df$site[meta_df$id == 26] <- "NY"
meta_df$site[meta_df$id == 7] <- "SJ"
meta_df$site[meta_df$id == 5] <- "AT"
meta_df$site[meta_df$id == 33] <- "ST"
meta_df$site[meta_df$id == 2] <- "HT"
meta_df$site[meta_df$id == 48] <- "TP"
meta_df$site[meta_df$id == 12] <- "DV"
meta_df$site[meta_df$id == 47] <- "DT"
meta_df$site[meta_df$id == 17] <- "KC"
meta_df$site[meta_df$id == 37] <- "SL"
meta_df <- meta_df %>%
left_join(data.frame(site = site_list, sitename = sitename_list), by = "site")
# make map
p_pollen_map <- ggplot() +
geom_polygon(data = map_data("state"), aes(x = long, y = lat, group = group), fill = "white") +
geom_path(data = map_data("state"), aes(x = long, y = lat, group = group), color = "grey50", alpha = 0.5, lwd = 0.2) +
theme_void() +
geom_point(data = meta_df, aes(x = lon, y = lat), pch = 10, color = "black", cex = 3) +
geom_label_repel(data = meta_df %>% filter(site %in% site_list), aes(x = lon, y = lat, label = sitename)) +
geom_point(data = meta_df %>% filter(site %in% site_list), aes(x = lon, y = lat), pch = 10, color = "red", cex = 3) +
coord_equal()
p_pollen_map
View pollen phenology in study sites.
nab_with_taxa_df %>%
left_join(meta_df %>% dplyr::select(id, site, sitename), by = "id") %>%
filter(site %in% site_list) %>%
filter(taxa %in% taxa_short_list) %>%
mutate(doy = format(date, "%j") %>% as.integer()) %>%
mutate(year = format(date, "%Y") %>% as.integer()) %>%
# filter(year %in% year_list) %>%
group_by(taxa, site) %>%
mutate(count=count %>% sqrt()) %>%
mutate(count_st=(count-min(count, na.rm = T))/(max(count, na.rm = T)-min(count, na.rm = T))) %>%
ggplot() +
geom_point(aes(x = doy, y = count_st, group = taxa, col = taxa), alpha=0.3) +
facet_grid(cols = vars(taxa), rows = vars(sitename), scales = "free_y") +
theme_classic()
## Warning in sqrt(.): NaNs produced
## Warning: Removed 7494 rows containing missing values (geom_point).
Read in and clean data. Each tree inventory has a different format, so they have to be processed differently.
cl <- makeCluster(length(site_list), outfile = "")
registerDoSNOW(cl)
trees_df_list <- foreach(
site = site_list,
.packages = c("tidyverse", "rgdal")
) %dopar% {
if (site == "KC" | site == "SL") {
# Data from urban FIA
# Download https://experience.arcgis.com/experience/3641cea45d614ab88791aef54f3a1849/page/Urban-Datamart/
# Manual: https://www.fia.fs.fed.us/library/database-documentation/urban/dbDescription/Urban_FIADB_User_Guides_Database_Description_ver3-0_2021_03_17.pdf
# tree table
id_tree_df<-read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/UrbanFIA/ID_TREE.csv") %>%
filter(statecd == 29) %>% # Missouri
filter(countycd %in% case_when(site == "KC" ~ 95, site == "SL" ~ c(189, 510))) %>%
dplyr::select(plotid, id = cn, spcd, statuscd) %>%
group_by(id) %>%
filter(sum(statuscd != 1) == 0) %>% # living trees
ungroup() %>%
distinct(id, .keep_all = T) %>% # in case one tree is surveyed repeatedly
dplyr::select(-statuscd)
# plot table
id_plot_df<-read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/UrbanFIA/ID_PLOT.csv") %>%
dplyr::select(plotid, lat, lon)
# species reference table
ref_sp_df<-read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/UrbanFIA/REF_SPECIES.csv") %>%
dplyr::select(spcd, genus, species) %>%
mutate(species = paste(genus, species))
# join tables
trees_df <- id_tree_df %>%
left_join(id_plot_df, by = "plotid") %>%
left_join(ref_sp_df, by = "spcd") %>%
dplyr::select(-plotid, -spcd) %>%
mutate(site = site)
}
if (site == "DT") {
# Data from Dan Katz
trees_df <- read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/Detroit_oak_pheno_obs_spring_2017.csv") %>%
distinct(id = tree, species = Species, x, y)
# reproject to WGS84
pts <- SpatialPoints(trees_df[, c("x", "y")],
proj4string = CRS("+init=EPSG:3857")
)
# +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs
pts_reproj <- spTransform(
pts,
CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
)
trees_df <- cbind(trees_df %>% dplyr::select(-x, -y), coordinates(pts_reproj)) %>%
rename(lat = y, lon = x) %>%
as_tibble() %>%
mutate(site = site)
}
if (site == "DV") {
# Data from OpenTrees.org
trees_df<-read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_Denver.csv") %>%
dplyr::select(id = SITE_ID, species = SPECIES_BO, time = INVENTORY_DATE, lat = Y_LAT, lon = X_LONG) %>%
arrange(desc(time)) %>%
distinct(id, species, .keep_all = T) %>%
dplyr::select(-time) %>%
filter(lon != 0) %>%
mutate(site = site)
}
if (site == "TP") {
# Data from https://www.opentreemap.org/tampa/map/
trees_df <- read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_Tampa.csv") %>%
dplyr::select(id = `Tree Id`, genus = Genus, species = Species, lat = `Point Y`, lon = `Point X`) %>% # checked that there is no repeated tree id
mutate(species = paste0(genus, " ", species)) %>%
dplyr::select(-genus) %>%
mutate(site = site)
}
if (site == "HT") {
# Data from https://koordinates.com/layer/25245-houston-texas-street-tree-inventory/data/
trees_df <- read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_Houston/houston-texas-street-tree-inventory.csv") %>%
dplyr::select(species_common = Species, X = Shape_X, Y = Shape_Y) %>%
rowwise() %>%
mutate(common = case_when(
str_detect(species_common, ", spp.") ~ str_replace(species_common, ", spp.", ""), # for coarse common names, e.g., "Oak, spp."
(str_detect(species_common, ", ") & !str_detect(species_common, ", spp.")) ~ paste0(str_split(species_common, ", ")[[1]][2], " ", str_split(species_common, ", ")[[1]][1]), # reverse order of common name, e.g., "Oak, Water"
TRUE ~ species_common
)) %>%
ungroup() %>%
distinct(X, Y, .keep_all = T) %>% # in case same tree is sampled repeatedly
mutate(id = row_number())
# reproject to WGS84
pts <- SpatialPoints(trees_df[, c("X", "Y")],
proj4string = CRS("+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs")
)
pts_reproj <- spTransform(
pts,
CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
)
trees_df <- cbind(trees_df %>% dplyr::select(-X, -Y), coordinates(pts_reproj)) %>%
rename(lat = Y, lon = X)
# Use taxize to match common name with scientific name
if (!file.exists("/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_Houston/species_reference.csv")) {
id2comm <- function(pageid) {
common_matches <- eol_pages(taxonconceptID = pageid, common_names = TRUE)$vernacular
if (is.null(common_matches)) {
common_match <- ""
} else {
common_matches <- common_matches %>% filter(language == "en")
if (nrow(common_matches) > 0) {
common_match <- common_matches %>%
pull(vernacularname) %>%
table() %>%
sort(decreasing = TRUE) %>%
head(1) %>%
names()
}
}
return(common_match)
}
comm2sci_new <- function(common) {
species <- comm2sci(common, simplify = T)[[1]]
if (length(species) == 0) {
res <- comm2sci(common, db = "eol", simplify = F)[[1]]
if (length(res) == 0) {
species <- NA
} else {
species <- res %>%
as_tibble() %>%
rowwise() %>%
mutate(common_match = id2comm(pageid)) %>%
ungroup() %>%
stringdist_inner_join(data.frame(common_name = common), by = c("common_match" = "common_name"), max_dist = 20, distance_col = "distance") %>%
arrange(distance, pageid) %>%
head(1) %>%
filter(common_match != "") %>%
pull(name)
if (length(species) == 0) {
species <- NA
}
}
print(paste(common, species))
}
if (is.na(species)) {
species <- readline(prompt = paste0(species_df$common[i], ". Manually enter scientific name: "))
}
return(species)
}
species_df <- trees_df %>%
distinct(common) %>%
rowwise() %>%
mutate(species = comm2sci_new(common))
write_csv(species_df, "/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_Houston/species_reference.csv")
} else {
species_df <- read_csv("/data/ZHULAB/phenology/StreetTrees/Tree_Inventory_Houston/species_reference.csv")
}
trees_df <- trees_df %>%
left_join(species_df, by = "common") %>%
dplyr::select(-species_common, -common) %>%
mutate(site = site)
}
if (site == "NY") {
trees_df <- read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_NewYork.csv") %>%
dplyr::select(id = tree_id, species = spc_latin, lat = latitude, lon = longitude) %>% # checked that there is no repeated tree id
mutate(site = site)
}
if (site == "AT") {
trees_df <- read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_Austin/Tree_Inventory_Austin.csv") %>%
dplyr::select(id = OBJECTID, species = SPECIES, coordinates = the_geom) %>% # checked that there is no repeated tree id
mutate(coordinates = str_replace(coordinates, "POINT \\(", "")) %>%
mutate(coordinates = str_replace(coordinates, "\\)", "")) %>%
rowwise() %>%
mutate(
lon = str_split(coordinates, pattern = " ", simplify = T)[1],
lat = str_split(coordinates, pattern = " ", simplify = T)[2]
) %>%
ungroup() %>%
mutate(
lon = as.numeric(lon),
lat = as.numeric(lat)
) %>%
dplyr::select(-coordinates) %>%
mutate(site = site)
}
if (site == "SJ") {
trees_df <- read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_SanJose/Tree_Inventory_SanJose.csv") %>%
dplyr::select(id = OBJECTID, species = NAMESCIENTIFIC, Y = Y, X = X) # checked that there is no repeated tree id
# shape <- readOGR(dsn = "/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_SanJose/Street_Tree.shp")
# proj4string(shape)
projection_sj <- "+proj=lcc +lat_0=36.5 +lon_0=-120.5 +lat_1=38.4333333333333 +lat_2=37.0666666666667 +x_0=2000000.0001016 +y_0=500000.0001016 +datum=NAD83 +units=us-ft +no_defs"
pts <- SpatialPoints(trees_df[, c("X", "Y")],
proj4string = CRS(projection_sj)
)
pts_reproj <- spTransform(
pts,
CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
)
trees_df <- cbind(trees_df %>% dplyr::select(-X, -Y), coordinates(pts_reproj)) %>%
rename(lat = Y, lon = X) %>%
mutate(site = site)
}
if (site == "ST") {
trees_df <- read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/Tree_Inventory_Seattle.csv") %>%
dplyr::select(id = OBJECTID, species = SCIENTIFIC_NAME, lat = Y, lon = X) %>% # checked that there is no repeated tree id
mutate(site = site)
}
trees_df
}
stopCluster(cl)
trees_df <- bind_rows(trees_df_list) %>%
rowwise() %>%
mutate(genus = str_split(species, pattern = " ", simplify = T)[1]) %>% # get genus name from species name
ungroup() %>%
distinct(id, site, .keep_all = T) %>% # in case one tree is surveyed repeatedly
mutate(genus_id = as.integer(as.factor(genus)))
Find family names from genus names. This step needs supervision.
genus_to_family<-trees_df %>%
distinct(genus) %>%
drop_na() %>%
mutate(family=tax_name(sci = genus, get = "family", messages = F, db="ncbi")$family)
write_rds(genus_to_family, "/data/ZHULAB/phenology/occurrence/genus_to_family.rds")
Join with inventory data and save.
genus_to_family<-read_rds("/data/ZHULAB/phenology/occurrence/genus_to_family.rds")
trees_df<-trees_df %>%
left_join(genus_to_family, by="genus")
write_rds(trees_df, "/data/ZHULAB/phenology/occurrence/street_trees.rds")
# Land cover data manually downloaded from Sentinel-2 Land Use/ Land Cover Downloader
# https://www.arcgis.com/apps/instant/media/index.html?appid=fc92d38533d440078f17678ebc20e8e2
grass_path <- "/data/ZHULAB/phenology/occurrence/LULC/"
trees_df <- read_rds("/data/ZHULAB/phenology/occurrence/street_trees.rds")
cl <- makeCluster(length(site_list), outfile = "")
registerDoSNOW(cl)
grass_df_city_list <- foreach(
siteoi = site_list,
.packages = c("tidyverse", "raster")
) %dopar% {
# get extent from tree inventory
trees_df_city <- trees_df %>%
filter(site == siteoi) %>%
drop_na(lon, lat)
bbox <- extent(min(trees_df_city$lon), max(trees_df_city$lon), min(trees_df_city$lat), max(trees_df_city$lat))
grass_df_city_year_list <- vector(mode = "list")
for (yearoi in year_list) {
# get file name(s) for each site and year
files <- list.files(paste0(grass_path, siteoi), pattern = paste0(yearoi, "0101-"), full.names = T)
# read raster(s)
ras <- raster(files)
# crop to city
bbox_sp <- as(bbox, "SpatialPolygons")
projection(bbox_sp) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
bbox_reproj <- spTransform(bbox_sp, proj4string(ras))
ras_cr <- crop(ras, bbox_reproj)
# get coordinates of grass
grass_df_year <- as.data.frame(ras_cr, xy = T) %>%
`colnames<-`(c("x", "y", "class")) %>%
filter(class == 11) %>%
dplyr::select(-class) %>%
mutate(year = yearoi)
grass_df_city_year_list[[yearoi %>% as.character()]] <- grass_df_year
print(paste0(siteoi, yearoi))
}
grass_df_city <- bind_rows(grass_df_city_year_list) %>%
mutate(grass = 1) %>% # choose pixels that are grass in all years
spread(key = "year", value = "grass") %>%
drop_na() %>%
dplyr::select(x, y) %>%
sample_n(min(10000, nrow(.))) # subset when there are too many grass pixels
# reproject
grass_sp_reproj <- SpatialPoints(grass_df_city[, c("x", "y")],
proj4string = CRS(proj4string(ras))
)
grass_sp <- spTransform(
grass_sp_reproj,
CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
)
# get coordinates
grass_df_city <- coordinates(grass_sp) %>%
as_tibble() %>%
`colnames<-`(c("lon", "lat")) %>%
mutate(site = siteoi) %>%
mutate(id = row_number()) %>%
mutate(family = "Poaceae") %>%
mutate(genus = "Unknown") %>%
mutate(genus_id = 999)
grass_df_city
}
stopCluster(cl)
grass_df <- bind_rows(grass_df_city_list)
write_rds(grass_df, "/data/ZHULAB/phenology/occurrence/grass.rds")
Get ragweed occurrence information from GBIF.
trees_df <- read_rds("/data/ZHULAB/phenology/occurrence/street_trees.rds")
cl <- makeCluster(length(site_list), outfile = "")
registerDoSNOW(cl)
ragweed_df_city_list <- foreach(
siteoi = site_list,
.packages = c("tidyverse", "raster", "sf", "spocc")
) %dopar% {
# get extent
trees_df_city <- trees_df %>%
filter(site == siteoi) %>%
drop_na(lon, lat)
bbox <- extent(min(trees_df_city$lon), max(trees_df_city$lon), min(trees_df_city$lat), max(trees_df_city$lat))
# make it a polygon
bbox_sp <- as(bbox, "SpatialPolygons")
projection(bbox_sp) <- "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
# get gbif data
res <- occ(
query = "Ambrosia", from = "gbif", has_coords = TRUE, limit = 1e6,
geometry = st_bbox(bbox_sp),
date = c(as.Date("2018-01-01"), as.Date("2021-12-31")),
gbifopts = list(
hasGeospatialIssue = FALSE
)
)
# get coordinates
ragweed_df_city <- res$gbif$data[[1]] %>%
dplyr::select(lon = longitude, lat = latitude, species) %>%
mutate(site = siteoi) %>%
mutate(family = "Asteraceae") %>%
mutate(genus = "Ambrosia") %>%
mutate(genus_id = 998)
ragweed_df_city
}
stopCluster(cl)
ragweed_df <- bind_rows(ragweed_df_city_list)
write_rds(ragweed_df, "/data/ZHULAB/phenology/occurrence/ragweed.rds")
Put tree, grass, and ragweed data in one data frame.
trees_df <- read_rds("/data/ZHULAB/phenology/occurrence/street_trees.rds")
grass_df <- read_rds("/data/ZHULAB/phenology/occurrence/grass.rds")
ragweed_df <- read_rds("/data/ZHULAB/phenology/occurrence/ragweed.rds")
plant_df <- bind_rows(trees_df, grass_df, ragweed_df)
Map relative position of tree inventory and nab station.
ggplot() +
geom_polygon(data = map_data("state"), aes(x = long, y = lat, group = group), fill = "white") +
geom_path(data = map_data("state"), aes(x = long, y = lat, group = group), color = "grey50", alpha = 0.5, lwd = 0.2) +
theme_void() +
geom_point(
data = plant_df %>%
filter(genus %in% taxa_short_list | family %in% taxa_short_list) %>%
mutate(taxa = case_when(
family %in% c("Poaceae", "Cupressaceae", "Pinaceae") ~ family,
TRUE ~ genus
)) %>%
# filter(!taxa%in% c("Poaceae", "Ambrosia")) %>%
group_by(site) %>%
sample_n(100) %>%
ungroup(),
aes(x = lon, y = lat), col = "dark blue", alpha = 0.5
) +
geom_point(data = meta_df %>% filter(site %in% site_list), aes(x = lon, y = lat), cex = 3, pch = 10, col = "red", alpha = 1) +
# geom_label_repel(data=meta_df %>% filter(site %in% site_list), aes(x=lon, y=lat, label = sitename))+
coord_equal()
Calculate distance from plants to NAB stations in the unit of km.
distance_df<-plant_df %>%
filter(genus %in% taxa_short_list | family %in% taxa_short_list) %>%
left_join(meta_df %>% dplyr::select (site, sitelon=lon, sitelat=lat, sitename) %>% drop_na(), by="site") %>%
rowwise() %>%
mutate(distance=distm(c(lon, lat), c(sitelon, sitelat), fun = distHaversine) %>% as.numeric() %>% `/`(1000)) %>% # distance in the unit of km
ungroup() %>%
group_by(site, sitename) %>%
summarise(mindist=min(distance),
maxdist=max(distance),
meandist=mean(distance))
write_rds(distance_df, "/data/ZHULAB/phenology/occurrence/distance_from_plants_to_nab_stations.rds")
read_rds("/data/ZHULAB/phenology/occurrence/distance_from_plants_to_nab_stations.rds")
## # A tibble: 10 x 5
## # Groups: site [10]
## site sitename mindist maxdist meandist
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 AT Austin 37.1 40.8 38.8
## 2 DT Detroit 83.1 98.1 92.2
## 3 DV Denver 67.0 134. 96.4
## 4 HT Houston 13.9 38.5 24.8
## 5 KC Kansas City 0.505 41.9 25.1
## 6 NY New York 0.0904 37.7 16.5
## 7 SJ San Jose 0.128 25.0 13.1
## 8 SL St. Louis 1.27 40.3 20.0
## 9 ST Seattle 0.0925 23.2 9.49
## 10 TP Tampa 0.00420 45.4 12.1
Map plant occurrence in each city.
ggplot() +
theme_classic() +
geom_point(
data = plant_df %>%
filter(genus %in% taxa_short_list | family %in% taxa_short_list) %>%
mutate(taxa = case_when(
family %in% c("Poaceae", "Cupressaceae", "Pinaceae") ~ family,
TRUE ~ genus
)) %>%
# filter(!taxa%in% c("Poaceae", "Ambrosia")) %>%
group_by(taxa, site) %>%
sample_n(min(100, n())) %>%
ungroup() %>%
left_join(meta_df %>% dplyr::select(site, sitename), by = "site"),
aes(x = lon, y = lat, col = taxa), alpha = 0.3
) +
facet_wrap(. ~ sitename, scales = "free")
phenophases <- npn_phenophases()
species_list <- npn_species()
# download all NPN data for taxa studied
npn_path <- "/data/ZHULAB/phenology/NPN/"
cl <- makeCluster(length(site_list), outfile = "")
registerDoSNOW(cl)
npn_df_list <- vector(mode = "list")
for (taxaoi_short in taxa_short_list %>% unique()) {
if (!file.exists(paste0(npn_path, taxaoi_short, ".rds"))) {
spid <- species_list %>%
filter(genus == taxaoi_short | family_name == taxaoi_short) %>%
pull(species_id)
npn_data <- npn_download_status_data(request_source = "YS", years = c(2000:2022), species_id = spid)
write_rds(npn_data, paste0(npn_path, taxaoi_short, ".rds"))
} else {
npn_data <- read_rds(paste0(npn_path, taxaoi_short, ".rds"))
}
npn_taxa_df_list <- foreach(
siteoi = site_list,
.packages = c("tidyverse", "geosphere")
) %dopar% {
lat_oi <- meta_df %>%
filter(site == siteoi) %>%
pull(lat) %>%
mean()
lon_oi <- meta_df %>%
filter(site == siteoi) %>%
pull(lon) %>%
mean()
npn_flower_df <- npn_data %>%
filter(phenophase_status == 1) %>%
filter(phenophase_description %in% c("Full pollen release (conifers)", "Pollen release (conifers)", "Pollen cones (conifers)", "Open pollen cones (conifers)", "Full flowering (50%)", "Flowers or flower buds", "Pollen release (flowers)"))
if (nrow(npn_flower_df > 0)) {
npn_flower_df <- npn_flower_df %>%
rowwise() %>%
mutate(distance = distm(c(longitude, latitude), c(lon_oi, lat_oi), fun = distHaversine) %>% as.numeric()) %>% # distance in the unit of m
ungroup() %>%
filter(distance <= 500000) %>% # within 500 km of the NAB station
dplyr::select(date = observation_date, lon = longitude, lat = latitude, doy = day_of_year) %>%
mutate(date = as.Date(date)) %>%
mutate(site = siteoi)
} else {
npn_flower_df <- data.frame(lon = numeric(0), lat = numeric(0), doy = integer(0), date = character(0), site = character(0)) %>% mutate(date = as.Date(date))
}
npn_count_df <- npn_flower_df %>%
group_by(site, date) %>%
summarise(count = n()) %>%
ungroup()
print(paste0(taxaoi_short, ", ", siteoi))
npn_count_df
}
npn_df_list[[taxaoi_short]] <- bind_rows(npn_taxa_df_list) %>%
mutate(taxa = taxaoi_short)
}
stopCluster(cl)
npn_df <- bind_rows(npn_df_list)
write_rds(npn_df, paste0(npn_path, "npn_dat.rds"))
View flowering phenology (from NPN data) in study sites.
npn_path <- "/data/ZHULAB/phenology/NPN/"
npn_df_all<-read_rds(paste0(npn_path, "npn_dat.rds"))
npn_df_all %>%
left_join(meta_df %>% dplyr::select(id, site, sitename), by = "site") %>%
filter(site %in% site_list) %>%
filter(taxa %in% taxa_short_list) %>%
mutate(doy = format(date, "%j") %>% as.integer()) %>%
mutate(year = format(date, "%Y") %>% as.integer()) %>%
# filter(year %in% year_list) %>%
group_by(taxa, site) %>%
mutate(count_st=(count-min(count, na.rm = T))/(max(count, na.rm = T)-min(count, na.rm = T))) %>%
ggplot() +
geom_point(aes(x = doy, y = count_st, group = taxa, col = taxa), alpha=0.3) +
facet_grid(cols = vars(taxa), rows = vars(sitename), scales = "free_y") +
theme_classic()
## Warning: Removed 19 rows containing missing values (geom_point).
# Downloaded from: https://chelsa-climate.org/bioclim/
# Documentation: https://chelsa-climate.org/wp-admin/download-page/CHELSA_tech_specification_V2.pdf
chelsa_path <- "/data/ZHULAB/phenology/CHELSA/"
# read in raster
tmean_ras <- raster(paste0(chelsa_path, "bio1.tif"))
ppt_ras <- raster(paste0(chelsa_path, "bio12.tif"))
vpdmax_ras <- raster(paste0(chelsa_path, "vpd_max.tif"))
# sites as points
meta_sf <- meta_df %>%
drop_na(site) %>%
dplyr::select(site, lon, lat) %>%
st_as_sf(
coords = c("lon", "lat"),
crs = 4326
)
# extract chelsa data at points
chelsa_df <- meta_sf %>%
mutate(
mat = raster::extract(tmean_ras, meta_sf),
tap = raster::extract(ppt_ras, meta_sf),
vpd = raster::extract(vpdmax_ras, meta_sf)
) %>%
as_tibble() %>%
dplyr::select(-geometry)
User settings.
# Set API
# api_key = "REMOVED" #ysong67
api_key <- "REMOVED" # xcui12
# Metadata filters
cloud_lim <- 0.99 # percent from 0-1
item_name <- "PSScene4Band"
# PSOrthoTile, PSScene3Band, PSScene4Band, Sentinel2L1C
# (see https://developers.planet.com/docs/data/items-assets/)
product <- "analytic_sr"
# analytic_b1, analytic_b2
# (see https://developers.planet.com/docs/data/items-assets/)
ps_path <- "/data/ZHULAB/phenology/PlanetScope/"
Order data. Orders may fail. May have to order again in that case.
for (siteoi in site_list) {
ps_path_site <- paste0(ps_path, siteoi, "/")
dir.create(ps_path_site, recursive = T)
dir.create(paste0(ps_path_site, "orders/"), recursive = T)
# Set AOI (many ways to set this!) ultimately just need an extent()
plant_df_site <- plant_df %>%
filter(site == siteoi) %>%
drop_na(lon, lat)
bbox <- extent(min(plant_df_site$lon), max(plant_df_site$lon), min(plant_df_site$lat), max(plant_df_site$lat))
for (year_download in 2017:2022) {
order_df <- data.frame(year = integer(0), month = integer(0), id = character(0))
if (year_download == 2022) {
month_end <- 4
} else {
month_end <- 12
}
for (month_download in 1:month_end) {
# Date range of interest
start_year <- year_download
end_year <- year_download
date_start <- lubridate::floor_date(as.Date(paste0(year_download, "-", str_pad(month_download, 2, pad = "0"), "-01")), unit = "month")
date_end <- lubridate::ceiling_date(as.Date(paste0(year_download, "-", str_pad(month_download, 2, pad = "0"), "-01")), unit = "month") - 1
start_doy <- as.numeric(format(date_start, "%j"))
end_doy <- as.numeric(format(date_end, "%j"))
# Create order name
order_name <- paste(siteoi, item_name, product, start_year, end_year, start_doy, end_doy, sep = "_")
# Planet Orders API
order_id <- planetR:::planet_order_request(
api_key = api_key,
bbox = bbox,
date_start = date_start,
date_end = date_end,
start_doy = start_doy,
end_doy = end_doy,
cloud_lim = cloud_lim,
item_name = item_name,
product = product,
order_name = order_name
)
# when there are too many items in an order, order id will not be returned. split order into a few groups in that case.
if (!is.null(order_id)) {
# store order name
order_df <- order_df %>%
bind_rows(data.frame(year = year_download, month = month_download, id = order_id))
} else {
item_num <- nrow(planet_search(
bbox, start_doy, end_doy, date_end,
date_start, cloud_lim, item_name, api_key
))
group_num <- ceiling(item_num / 500)
split_num <- ceiling(30 / group_num)
doy_group <- split(seq(start_doy, end_doy), floor(seq(1:(end_doy - start_doy + 1)) / split_num))
for (g in 1:length(doy_group)) {
order_id <- planetR:::planet_order_request(
api_key = api_key,
bbox = bbox,
date_start = date_start,
date_end = date_end,
start_doy = min(doy_group[[g]]),
end_doy = max(doy_group[[g]]),
cloud_lim = cloud_lim,
item_name = item_name,
product = product,
order_name = order_name
)
if (!is.null(order_id)) {
order_df <- order_df %>%
bind_rows(data.frame(year = year_download, month = month_download, id = order_id))
}
}
}
print(paste(year_download, ", ", month_download))
}
dir.create(paste0(ps_path_site, "orders/"))
write_rds(order_df, paste0(ps_path_site, "orders/", "order_", year_download, ".rds"))
}
}
for (siteoi in site_list) {
ps_path_site <- paste0(ps_path, siteoi, "/")
for (year_download in 2017:2022) {
order_df <- read_rds(paste0(ps_path_site, "orders/", "order_", year_download, ".rds"))
cl <- makeCluster(nrow(order_df), outfile = "")
registerDoSNOW(cl)
foreach(
i = 1:nrow(order_df),
.packages = c("stringr", "planetR", "httr")
) %dopar% {
# Get order id
month_download <- order_df$month[i]
order_id <- order_df$id[i]
# Date range of interest
start_year <- year_download
end_year <- year_download
date_start <- lubridate::floor_date(as.Date(paste0(year_download, "-", str_pad(month_download, 2, pad = "0"), "-01")), unit = "month")
date_end <- lubridate::ceiling_date(as.Date(paste0(year_download, "-", str_pad(month_download, 2, pad = "0"), "-01")), unit = "month") - 1
start_doy <- as.numeric(format(date_start, "%j"))
end_doy <- as.numeric(format(date_end, "%j"))
# Set/Create Export Folder
order_name <- paste(siteoi, item_name, product, start_year, end_year, start_doy, end_doy, sep = "_")
exportfolder <- paste0(ps_path_site, order_name)
dir.create(exportfolder, recursive = T, showWarnings = F)
# Download
Sys.sleep(i * 0.5) # Otherwise sending request to API at the same time may cause error
planet_order_download_new(order_id, exportfolder, api_key = api_key, order_num = i, overwrite_opt = FALSE)
print(paste(year_download, ", ", month_download))
}
stopCluster(cl)
}
}
cl <- makeCluster(50, outfile = "")
registerDoSNOW(cl)
iscomplete <- F
while (!iscomplete) { # restart when there is error, usually because of cluster connection issues
iserror <- try(
for (taxaoi in taxa_list) {
taxaoi_short <- str_split(taxaoi, " ", simplify = T)[1]
for (s in 1:length(site_list)) {
# get plant locations
siteoi <- site_list[s]
plant_taxa_df <- plant_df %>%
filter(site == siteoi) %>%
filter(genus == taxaoi_short | family == taxaoi_short) %>%
mutate(id = row_number()) %>%
drop_na(lon, lat)
# skip when there are too few plants
if (taxaoi_short %in% c("Ambrosia", "Poaceae")) {
min_sample_size <- 1
} else {
min_sample_size <- 1
}
if (nrow(plant_taxa_df) >= min_sample_size) {
# plants as points
plant_taxa_sp <- SpatialPoints(plant_taxa_df[, c("lon", "lat")],
proj4string = CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
)
if (!file.exists(paste0(ps_path, "analyses/ps_", siteoi, "_", taxaoi_short, ".rds"))) {
# read reflectance data
files <- list.files(path = paste0(ps_path, siteoi), pattern = ".*_SR_clip.tif$", recursive = T, full.names = T) %>% sort()
nday <- length(files)
nloc <- length(plant_taxa_sp)
ps_mat <- foreach(
f = 1:nday,
.packages = c("raster"),
.combine = "rbind"
) %dopar% {
file <- files[f]
ps_st <- stack(file)
trees_sp_reproj <- spTransform(plant_taxa_sp, CRSobj = CRS(proj4string(ps_st)))
ps_values <- cbind(raster::extract(ps_st, trees_sp_reproj), f, id = 1:nloc)
print(paste0(f, " out of ", nday))
ps_values[complete.cases(ps_values), ]
}
# read quality assessment data
# 0 - fully usable data
# other - potentially problematic/unusable data
#
# Full description is in Planet's documentation (Page 91, Section 2. UNUSABLE DATA MASK FILE).
files <- list.files(path = paste0(ps_path, siteoi), pattern = ".*_udm_clip.tif$", recursive = T, full.names = T) %>% sort()
nday <- length(files)
nloc <- length(plant_taxa_sp)
ps_mask_mat <- foreach(
f = 1:nday,
.packages = c("raster"),
.combine = "rbind"
) %dopar% {
file <- files[f]
ps_ras <- raster(file)
trees_sp_reproj <- spTransform(plant_taxa_sp, CRSobj = CRS(proj4string(ps_ras)))
ps_values <- cbind(qa = raster::extract(ps_ras, trees_sp_reproj), f, id = 1:nloc)
print(paste0(f, " out of ", nday))
ps_values[complete.cases(ps_values), ]
}
colnames(ps_mask_mat) <- c("qa", "f", "id")
stopCluster(cl)
# get corresponding timing from file names
time_df <- list.files(path = paste0(ps_path, siteoi), pattern = ".*_SR_clip.tif$", recursive = T) %>%
sort() %>%
str_split(pattern = "/", simplify = T) %>%
data.frame() %>%
dplyr::select(filename = X2) %>%
rowwise() %>%
mutate(time = strptime(paste0(str_split(filename, pattern = "_")[[1]][1], str_split(filename, pattern = "_")[[1]][2]), format = "%Y%m%d%H%M%OS")) %>%
ungroup() %>%
mutate(f = row_number()) %>%
dplyr::select(-filename)
# assign id to each plant
coord_df <- coordinates(plant_taxa_sp) %>%
as_tibble() %>%
mutate(id = row_number())
# join data
ps_df <- ps_mat %>%
as_tibble() %>%
left_join(time_df, by = "f") %>%
left_join(coord_df, by = "id") %>%
mutate(
red = red * 0.0001, # scaling following Dixon et al's code
green = green * 0.0001,
blue = blue * 0.0001,
nir = nir * 0.0001
) %>%
# mutate(evi=2.5* (nir-red) / (nir + 6*red - 7.5*blue + 1),
# gndvi=(nir-green)/(nir+green),
# ebi= (red + green + blue) / (green / blue * (red - blue + 1))) %>%
left_join(ps_mask_mat %>% as_tibble(), by = c("id", "f")) %>%
dplyr::select(-f)
# save
write_rds(ps_df, paste0(ps_path, "analyses/ps_", siteoi, "_", taxaoi_short, ".rds"))
}
}
}
}
)
if (class(iserror) != "try-error") {
iscomplete <- T
} else if (class(iserror) == "try-error") { # restart cluster
iscomplete <- F
closeAllConnections()
cl <- makeCluster(50, outfile = "")
registerDoSNOW(cl)
}
}
stopCluster(cl)
Example time series.
site_vis <- "DT"
taxa_vis <- "Quercus"
set.seed(1)
read_rds(paste0(ps_path, "analyses/ps_", site_vis, "_", taxa_vis, ".rds")) %>%
filter(id %in% ((.) %>% pull(id) %>% sample(4))) %>% # four random trees
filter(qa == 0) %>%
gather(key = "band", value = "value", -id, -time, -lon, -lat, -qa) %>%
ggplot() +
geom_point(aes(x = time, y = value, col = band), alpha = 0.25) +
facet_wrap(. ~ id, ncol = 1) +
theme_classic() +
ggtitle(paste0("Taxa: ", taxa_vis, "; Site: ", site_vis))
Some settings.
# set color palette
cols <- c("EVI (PS)" = "dark green", "G2R (PS)" = "yellow green", "EBI (PS)" = "orange", "pollen count (NAB)" = "dark red", "flower observation (USA-NPN)" = "dark orchid", "flowering frequency" = "dark blue", "flower observation (DT)" = "coral")
# possible green up and green down thresholds
thres_list_up <- seq(from = 0, to = 1, by = 0.1) %>% round(1)
thres_list_down <- seq(from = 1, to = 0.0, by = -0.1) %>% round(1)
thres_df <- bind_rows(
data.frame(direction = "up", threshold = thres_list_up),
data.frame(direction = "down", threshold = thres_list_down)
)
# read in manually determined window of flowering
flower_window_df <- read_csv("/data/ZHULAB/phenology/nab/flower_window.csv")
Is there even a typical phenological curve? This is a function to rule out curves that are flat.
flat_better <- function(ts, doy = (274 - 365):(365 + 151), k = 50) {
fit_list <- vector(mode = "list")
# fit simple linear regression model
fit0 <- lm(ts ~ doy, data = data.frame(doy, ts))
fit_list[[1]] <- data.frame(AIC = AIC(fit0, k = k), model = "fit0")
# fit piecewise regression model to original model with different numbers of breakpoints
try( # sometimes fitting may fail
{
fit1 <- segmented(fit0, seg.Z = ~doy, npsi = 1, it = 10, control = seg.control(seed = 42, fix.npsi = T))
fit_list[[2]] <- data.frame(AIC = AIC(fit1, k = k), model = "fit1")
},
silent = T
)
try(
{
fit2 <- segmented(fit0, seg.Z = ~doy, npsi = 2, it = 10, control = seg.control(seed = 42, fix.npsi = T))
fit_list[[3]] <- data.frame(AIC = AIC(fit2, k = k), model = "fit2")
},
silent = T
)
try(
{
fit3 <- segmented(fit0, seg.Z = ~doy, npsi = 3, it = 10, control = seg.control(seed = 42, fix.npsi = T))
fit_list[[4]] <- data.frame(AIC = AIC(fit3, k = k), model = "fit3")
},
silent = T
)
# rank by AIC. k=50 sets a large penalty for more complex models
aic_df <- bind_rows(fit_list) %>%
arrange(AIC, model)
# return 1 when straight line is the best fit
better <- aic_df %>%
head(1) %>%
pull(model) == "fit0"
return(better)
}
Trees in detroit.
taxaoi <- "Quercus"
siteoi <- "DT"
yearoi <- 2017
taxaoi_short <- str_split(taxaoi, " ", simplify = T)[1]
flower_window <- seq(flower_window_df %>% filter(taxa == taxaoi) %>% pull(start),
flower_window_df %>% filter(taxa == taxaoi) %>% pull(end),
by = 1
)
thres_df_taxa <- thres_df %>% filter(direction == "up")
plant_taxa_df <- plant_df %>%
filter(site == siteoi) %>%
filter(genus == taxaoi_short | family == taxaoi_short) %>%
mutate(id = row_number()) %>%
drop_na(lon, lat)
# plant_df %>%
# filter(site==siteoi) %>%
# filter(genus==taxaoi_short|family==taxaoi_short) %>%
# group_by(species) %>%
# summarise(n=n()) %>%
# ungroup() %>%
# arrange(desc(n))
plant_taxa_df %>%
ggplot() +
geom_point(aes(x = lon, y = lat, col = species)) +
theme_classic()
Flowering observations.
detroit_df <- read_csv("/data/ZHULAB/phenology/occurrence/StreetTrees/Detroit_oak_pheno_obs_spring_2017.csv")
detroit_df_ts <- detroit_df %>%
left_join(detroit_df %>%
distinct(tree) %>%
mutate(id = row_number()),
by = "tree"
) %>%
left_join(trees_df %>% filter(site == "DT") %>% dplyr::select(id, lon, lat), by = c("tree" = "id")) %>% # trees_df uses "tree" as "id"
left_join(detroit_df %>%
group_by(tree) %>%
arrange(julian_day) %>%
summarize(
mindoy = min(julian_day),
# peak=findpeaks(flowering_interp, sortstr = T)[1,2] %>% as.numeric(),
thres = which(flowering_interp >= 95) %>% min() # first day that reaches 95% flowering
) %>%
mutate(peak = thres + mindoy - 1) %>% # correct with starting doy
ungroup() %>%
dplyr::select(tree, peak), by = c("tree")) %>%
arrange(peak) %>%
mutate(id = as.character(id)) %>%
mutate(date = as.Date("2017-01-01") + julian_day - 1) %>%
mutate(peak_date = as.Date("2017-01-01") + peak - 1)
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
detroit_df_ts %>%
filter(id %in% ((.) %>% distinct(id) %>% pull(id) %>% sample(6))) %>%
ggplot() +
geom_point(aes(x = date, y = flowering_raw), col = "coral") +
geom_line(aes(x = date, y = flowering_interp)) +
geom_vline(aes(xintercept = peak_date), col = "dark orchid") +
facet_wrap(. ~ id, ncol = 1) +
ylab("flowering status")+
theme_classic()
## Warning: Removed 292 rows containing missing values (geom_point).
View EVI, NAB, and NPN data together for one tree.
# preprocess ps data
ps_df <- read_rds(paste0(ps_path, "analyses/ps_", siteoi, "_", taxaoi_short, ".rds"))
ps_df_proc <- ps_df %>%
drop_na() %>%
mutate(date = as.Date(time)) %>%
mutate(
year = format(time, "%Y") %>% as.integer(),
doy = format(time, "%j") %>% as.integer(),
hour = format(strptime(time, "%Y-%m-%d %H:%M:%S"), "%H") %>% as.integer()
) %>%
filter(qa == 0) %>%
group_by(id, lon, lat, date, year, doy) %>%
summarise(
blue = mean(blue),
green = mean(green),
red = mean(red),
nir = mean(nir)
) %>%
ungroup() %>%
mutate(evi = 2.5 * (nir - red) / (nir + 6 * red - 7.5 * blue + 1)) %>%
filter(evi > 0, evi <= 1) %>%
filter(red > 0, green > 0, blue > 0)
# subset nab data
pollen_df <- nab_with_taxa_df %>%
left_join(meta_df %>% dplyr::select(id, site), by = "id") %>%
filter(site == siteoi) %>%
filter(genus == taxaoi_short | family == taxaoi_short)
# subset npn data
npn_df <- npn_df_all %>%
filter(site == siteoi) %>%
filter(taxa == taxaoi_short)
# join ps, nab, and npn data
ts_df <- ps_df_proc %>%
left_join(plant_taxa_df, by = c("id", "lon", "lat")) %>%
dplyr::select(id, date,
`EVI (PS)` = evi
) %>%
mutate(id = as.factor(id)) %>%
full_join(pollen_df %>%
dplyr::select(date, `pollen count (NAB)` = count) %>%
mutate(id = "pollen"),
by = c("date", "id")
) %>%
full_join(npn_df %>%
dplyr::select(date, `flower observation (USA-NPN)` = count) %>%
mutate(id = "npn"),
by = c("date", "id")
) %>%
arrange(id, date) %>%
mutate(doy = format(date, "%j") %>% as.numeric()) %>%
mutate(year = format(date, "%Y") %>% as.numeric())
# focus on one year, spanning from day 274 in the previous year to day 151 in the next year
ts_df_subset <- ts_df %>%
filter(doy != 366) %>%
# filter(doy>start_doy,doy<=end_doy) %>%
filter(year == yearoi | year == (yearoi - 1) | year == (yearoi + 1)) %>%
mutate(doy = ifelse(doy > 273 & year == yearoi - 1, doy - 365, doy)) %>%
mutate(year = ifelse(doy <= 0 & year == yearoi - 1, year + 1, year)) %>%
mutate(doy = ifelse(doy < 152 & year == yearoi + 1, doy + 365, doy)) %>%
mutate(year = ifelse(doy > 365 & year == yearoi + 1, year - 1, year)) %>%
filter(year == yearoi) %>%
gather(key = "var", value = "value", -date, -id, -doy, -year) %>%
mutate(var = fct_relevel(var, levels = c("EVI (PS)", "G2R (PS)", "EBI (PS)", "pollen count (NAB)", "flower observation (USA-NPN)"))) %>%
arrange(doy)
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Unknown levels in `f`: G2R (PS), EBI (PS)
# join with flowering data
ts_df_subset <- ts_df_subset %>%
bind_rows(detroit_df_ts %>% dplyr::select(id, date, lon, lat, value = flowering_raw, doy = julian_day) %>%
mutate(
var = "flower observation (DT)",
year = 2017
))
# summarize quantiles on the site level
ts_df_subset_summary <- ts_df_subset %>%
drop_na(value) %>%
group_by(date, var, doy, year) %>%
summarise(
q1 = quantile(value, 0.05, na.rm = T),
q2 = quantile(value, 0.5, na.rm = T),
q3 = quantile(value, 0.95, na.rm = T),
n = n()
) %>%
filter(n > 1) %>%
ungroup()
# visualize for one tree
idoi="1"
plant_sp<-plant_taxa_df %>% filter(id==idoi) %>% pull(species)
ts_df_subset %>%
filter(id == idoi | id == "pollen" | id == "npn") %>%
ggplot() +
geom_point(aes(x = date, y = value, col = var)) +
facet_wrap(. ~ var, ncol = 1, scales = "free_y") +
scale_color_manual(values = cols) +
ylab("")+
ggtitle(paste0("Site: ", siteoi," Year: ", yearoi," ID: ", idoi, " Species: ", plant_sp))+
theme_classic()
## Warning: Removed 1113 rows containing missing values (geom_point).
Detect greenup at multiple thresholds.
get_doy<-function (plant_df, thres_df, ts_df, idoi) {
plant_sp<-plant_df %>% filter(id==idoi) %>% pull(species)
px_doy<-ts_df %>% filter(id==idoi) %>% filter(var=="EVI (PS)") %>% pull(doy)
if (length(px_doy [px_doy>=1 & px_doy<=365])<50) {
flower_df_up<-flower_df_down<-NULL
print("too few data points")
} else {
px_evi<-ts_df %>% filter(id==idoi) %>% filter(var=="EVI (PS)") %>% pull(value)
px_evi_in<-approx(px_doy, px_evi, (274-365):(365+151), rule=1)$y
px_evi_in_sm<-whitfun(px_evi_in,30)
flatbetter<-flat_better(px_evi_in_sm, k=50)
### green down
thres_list_down<-thres_df %>% filter(direction=="down") %>% pull(threshold)
if (length(thres_list_down)==0) {
flower_df_down<-NULL
} else {
max_evi<-quantile(px_evi_in_sm[(-274+365+1):(-274+365+365+1)], 1, na.rm = T)
start_doy<-which(!is.na(px_evi_in_sm[(-274+365+1):(-274+365+365+1)])&px_evi_in_sm[(-274+365+1):(-274+365+365+1)]>=max_evi) %>% max()-274+365
min_evi<-quantile(px_evi_in_sm[start_doy:length(px_evi_in_sm)], 0, na.rm = T)
min_doy<-which(!is.na(px_evi_in_sm[start_doy:length(px_evi_in_sm)])& px_evi_in_sm[start_doy:length(px_evi_in_sm)]<=min_evi) %>% min()+start_doy
end_doy<-min_doy
param_ok2<- (end_doy>start_doy) & (!flatbetter)
if (!param_ok2) {
greendown_doy<-rep(NA, length(thres_list_down))
start_doy<-NA
end_doy<-NA
print("not typical growth curve")
} else {
greendown_thres<-(max_evi-min_evi)*thres_list_down+min_evi
greendown_doy<-rep(NA, length(greendown_thres))
for (t in 1:length(greendown_thres)) {
greendown_doy[t]<-which(px_evi_in_sm[start_doy:end_doy]<=greendown_thres[t]) %>% min()+start_doy
}
start_doy=start_doy+274-365
end_doy=end_doy+274-365
greendown_doy=greendown_doy+274-365
}
flower_df_down<-data.frame(id=idoi, start=start_doy, end=end_doy,direction="down", thres=thres_list_down, doy=greendown_doy)
}
### green up
thres_list_up<-thres_df %>% filter(direction=="up") %>% pull(threshold)
if (length(thres_list_up)==0) {
flower_df_up<-NULL
} else {
max_evi<-quantile(px_evi_in_sm[(-274+365+1):(-274+365+365+1)], 1, na.rm = T)
end_doy<-which(!is.na(px_evi_in_sm[(-274+365+1):(-274+365+365+1)])&px_evi_in_sm[(-274+365+1):(-274+365+365+1)]>=max_evi) %>% min()-274+365
min_evi<-quantile(px_evi_in_sm[1:end_doy], 0.00, na.rm = T)
min_doy<-which(!is.na(px_evi_in_sm[1:end_doy])& px_evi_in_sm[1:end_doy]<=min_evi) %>% max()
start_doy<-min_doy
param_ok2<- (end_doy>start_doy) & (!flatbetter)
if (!param_ok2) {
greenup_doy<-rep(NA, length(thres_list_up))
start_doy<-NA
end_doy<-NA
print("not typical growth curve")
} else {
greenup_thres<-(max_evi-min_evi)*thres_list_up+min_evi
greenup_doy<-rep(NA, length(greenup_thres))
for (t in 1:length(greenup_thres)) {
greenup_doy[t]<-which(px_evi_in_sm[start_doy:end_doy]>=greenup_thres[t]) %>% min()+start_doy
}
start_doy=start_doy+274-365
end_doy=end_doy+274-365
greenup_doy=greenup_doy+274-365
}
flower_df_up<-data.frame(id=idoi, start=start_doy, end=end_doy,direction="up", thres=thres_list_up, doy=greenup_doy)
}
}
flower_df<-bind_rows(flower_df_up, flower_df_down)
return (flower_df)
}
plot_tree<-function (plant_df, ts_df, flower_df, idoi) {
plant_sp<-plant_df %>% filter(id==idoi) %>% pull(species)
p_1tree<-ggplot( )+
geom_point(data=ts_df_subset %>% filter(id==idoi|id=="npn"|id=='pollen') %>%
filter(var %in% c("EVI (PS)","pollen count (NAB)", "flower observation (USA-NPN)", "flower observation (DT)")),
aes(doy, value,group=as.factor(id), col=var))+
theme_classic()+
guides(col=F,
alpha=F)+
scale_color_manual(values=cols)+
facet_wrap(.~var, ncol=1, scales = "free_y")+
xlab("day of year")+
ylab("")+
ggtitle(paste0("Site: ", siteoi," Year: ", yearoi," ID: ", idoi, " Species: ", plant_sp))
if (nrow(flower_df)==0) {
p_1tree
} else {
p_1tree<-p_1tree+
geom_vline(data=flower_df, aes(xintercept = start), col="red", alpha=0.8)+
geom_vline(data=flower_df, aes(xintercept = end), col="red", alpha=0.8)+
geom_vline(data=flower_df, aes(xintercept = doy), col="red", alpha=0.3)
}
return (p_1tree)
}
idoi <- "1"
flower_df <- get_doy(plant_taxa_df, thres_df_taxa,ts_df_subset, idoi)
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
p_1tree <- plot_tree(plant_taxa_df, ts_df_subset, flower_df, idoi)
p_1tree
## Warning: Removed 1113 rows containing missing values (geom_point).
Repeat for all trees to get frequency distribution of green-up dates.
# get green-up/green-down doy for each tree
random_id <- unique(plant_taxa_df$id)
flower_df_list <- vector(mode = "list", length = length(random_id))
for (i in 1:length(random_id)) {
blas_set_num_threads(1)
omp_set_num_threads(1)
idoi <- as.character(random_id)[i]
flower_df <- get_doy(plant_taxa_df, thres_df_taxa,ts_df_subset, idoi)
# print(paste0(i , " out of ", length(random_id)))
flower_df_list[[i]] <- flower_df
}
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## [1] "too few data points"
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## [1] "too few data points"
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(.): no non-missing arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
## Warning in min(which(!is.na(x[(max_id + 1):length(x)]))): no non-missing
## arguments to min; returning Inf
# bind with species and site info
flower_doy_df <- bind_rows(flower_df_list) %>%
left_join(plant_taxa_df %>% dplyr::select(id, species, lat, lon) %>% mutate(id = as.character(id)), by = "id") %>%
mutate(site = siteoi, year = yearoi)
# summarize frequency on the site level
flower_freq_df_list <- vector(mode = "list", length = nrow(thres_df_taxa))
for (t in 1:nrow(thres_df_taxa)) {
flower_freq_df_list[[t]] <- flower_doy_df %>%
drop_na(start, end) %>%
filter(
direction == thres_df_taxa$direction[t],
thres == thres_df_taxa$threshold[t]
) %>%
group_by(doy, thres, direction) %>%
summarise(count = n()) %>%
ungroup() %>%
mutate(freq = count / n())
}
flower_freq_df <- bind_rows(flower_freq_df_list)
# join frequency distribution with EVI, NAB and NPN data
flower_freq_df <- flower_freq_df %>%
mutate(doy = factor(doy, levels = c((274 - 365):(365 + 151)))) %>%
complete(doy, thres, direction, fill = list(count = 0, freq = 0)) %>%
mutate(doy = doy %>% as.character() %>% as.numeric()) %>%
arrange(doy) %>%
full_join(ts_df_subset %>%
filter(id == "pollen") %>%
filter(var == "pollen count (NAB)") %>%
filter(year == yearoi) %>%
dplyr::select(doy, pollen = value),
by = "doy"
) %>%
full_join(ts_df_subset %>%
filter(id == "npn") %>%
filter(var == "flower observation (USA-NPN)") %>%
filter(year == yearoi) %>%
dplyr::select(doy, npn = value),
by = "doy"
) %>%
full_join(ts_df_subset_summary %>%
filter(var == "EVI (PS)") %>%
filter(year == yearoi) %>%
dplyr::select(doy, evi = q2),
by = "doy"
) %>%
mutate(doy = as.numeric(doy)) %>%
mutate(site = siteoi, year = yearoi) %>%
drop_na(doy, thres)
Plot green-up doy at several thresholds vs. flower observations. (Only showing within the limit of day 110 to 160.)
flower_doy_df %>%
filter(thres %in% c(0.4, 0.5, 0.6, 0.7, 0.8)) %>%
left_join(detroit_df_ts %>% dplyr::distinct(id, peak) %>% filter(is.finite(peak)), by = "id") %>%
ggplot() +
geom_jitter(aes(x = peak, y = doy, group = interaction(direction, thres), col = interaction(direction, thres)), alpha = 1) +
geom_smooth(aes(x = peak, y = doy, group = interaction(direction, thres), col = interaction(direction, thres)), method = "lm") +
geom_abline(slope = 1, intercept = 0, col = "red") +
ggpubr::stat_cor(
aes(x = peak, y = doy, group = interaction(direction, thres),
col = interaction(direction, thres),
label = paste(..r.label.., ..p.label.., sep = "*`,`~")),
p.accuracy = 0.05,
label.x.npc = 0.7,
label.y.npc = "top",
show.legend=F
)+
coord_equal() +
ylim(c(110, 160)) +
xlim(c(110, 160)) +
xlab ("flowering doy")+
ylab ("green-up doy")+
labs(col="direction and threshold")+
theme_classic()
## Warning: Removed 77 rows containing non-finite values (stat_smooth).
## Warning: Removed 77 rows containing non-finite values (stat_cor).
## Warning: Removed 81 rows containing missing values (geom_point).
Repeat for all years and check correlation. Here only showing green-up at 50%. There were significantly positive correlations in three out of five years, suggesting that green-up and flowering time may be correlated on the individual level.
read_rds("/raid/users/ysong67/GitHub/phenology/RS4flower/output/data/Quercus/flowering day of year.rds") %>%
filter(site==siteoi) %>%
bind_rows(flower_doy_df %>% mutate(year=yearoi)) %>%
filter(thres==0.5,
direction=="up") %>%
left_join(detroit_df_ts %>% dplyr::distinct(id, peak) %>% filter(is.finite(peak)), by="id") %>%
ggplot()+
geom_jitter(aes(x=peak, y=doy, group=as.factor(year), col=as.factor(year)), alpha=1)+
geom_smooth(aes(x=peak, y=doy, group=as.factor(year), col=as.factor(year)), method="lm")+
geom_abline(slope = 1, intercept = 0, col="red")+
ggpubr::stat_cor(
aes(x = peak, y = doy, group = as.factor(year),
col = as.factor(year),
label = paste(..r.label.., ..p.label.., sep = "*`,`~")),
p.accuracy = 0.05,
label.x.npc = 0.7,
label.y.npc = "top",
show.legend=F
)+
coord_equal()+
ylim(c(110,160 ))+
xlim(c(110,160 ))+
theme_classic()
## Warning: Removed 87 rows containing non-finite values (stat_smooth).
## Warning: Removed 87 rows containing non-finite values (stat_cor).
## Warning: Removed 89 rows containing missing values (geom_point).
Compare maps.
read_rds("/raid/users/ysong67/GitHub/phenology/RS4flower/output/data/Quercus/flowering day of year.rds") %>%
filter(site==siteoi) %>%
bind_rows(flower_doy_df %>% mutate(year=yearoi)) %>%
filter(thres==0.5,
direction=="up") %>%
dplyr::select(id, doy, lat, lon,year) %>%
mutate(year=as.factor(year)) %>%
bind_rows(detroit_df_ts %>% rename(doy=peak) %>% dplyr::distinct(id, doy, lat, lon) %>% filter(is.finite(doy)) %>% mutate(year="flower observation (DT)")) %>%
filter(doy<=160 & doy>=110) %>%
spread(key="year", value="doy") %>%
# drop_na() %>%
rowwise() %>%
mutate(lat=lat+rnorm(1, 0, 0.002),
lon=lon+rnorm(1, 0, 0.002)) %>%
ungroup() %>%
gather(key="group", value="doy", -id, -lat, -lon) %>%
group_by(group) %>%
mutate(doy=(doy-mean(doy, na.rm = T))/(quantile(doy, 0.95, na.rm = T)-quantile(doy, 0.05, na.rm = T))) %>% # standardize to between -0.5 and 0.5
filter(doy>=-0.5 & doy<=0.5) %>% # remove outliers
ungroup() %>%
ggplot()+
geom_point(aes(x=lon, y=lat, col=doy))+
facet_wrap(.~group, ncol=2)+
coord_equal()+
theme_classic()+
scale_color_viridis_c(direction = -1)
apply tp hls in dt
Conceptual figure.
conceptual1<-image_read("/raid/users/ysong67/GitHub/phenology/RS4flower/output/figures/conceptual1.jpg")
p_conceptual1<-image_ggplot(conceptual1)
conceptual2<-image_read("/raid/users/ysong67/GitHub/phenology/RS4flower/output/figures/conceptual1.jpg")
p_conceptual2<-image_ggplot(conceptual2)
grid.arrange(p_conceptual1, p_conceptual2, ncol=1)
cl <- makeCluster(50, outfile = "")
registerDoSNOW(cl)
for (taxaoi in taxa_list) {
taxaoi_short<-str_split(taxaoi, " ", simplify = T)[1]
flower_window<-seq(flower_window_df %>% filter(taxa==taxaoi) %>% pull(start),
flower_window_df %>% filter(taxa==taxaoi) %>% pull(end),
by=1)
if (taxaoi %in% c("Ambrosia", "Ulmus late")) {
thres_df_taxa<-thres_df %>% filter(direction=="down")
} else if (taxaoi== "Poaceae early" ) {
thres_df_taxa<-thres_df %>% filter(threshold>=0.5|direction=="up")
} else if (taxaoi== "Poaceae late" ) {
thres_df_taxa<-thres_df %>% filter(threshold>=0.5|direction=="down")
} else {
thres_df_taxa<-thres_df %>% filter(direction=="up")
}
flower_doy_df_siteyears_list<-flower_freq_df_siteyears_list<-vector(mode="list", length=length(site_list))
for (s in 1:length(site_list)) {
siteoi<-site_list[s]
plant_taxa_df<-plant_df %>%
filter(site==siteoi) %>%
filter(genus==taxaoi_short|family==taxaoi_short) %>%
mutate(id=row_number()) %>%
drop_na(lon, lat)
plant_df %>%
filter(site==siteoi) %>%
filter(genus==taxaoi_short|family==taxaoi_short) %>%
group_by(species) %>%
summarise(n=n()) %>%
ungroup() %>%
arrange(desc(n))
if (taxaoi %in% c("Ambrosia","Poaceae")) {
min_sample_size=10
} else {
min_sample_size=20
}
if (nrow(plant_taxa_df)>=min_sample_size) {
set.seed(1)
random_id<<-sample(plant_taxa_df$id, min(2000, length(unique(plant_taxa_df$id)))) %>% sort()
# preprocess ps data
ps_df<-read_rds(paste0(ps_path, "analyses/ps_",siteoi,"_",taxaoi_short,".rds"))
ps_df_proc<- ps_df%>%
drop_na() %>%
filter(id%in% random_id) %>%
mutate(date=as.Date(time)) %>%
mutate(year=format(time, "%Y") %>% as.integer(),
doy=format(time, "%j") %>% as.integer(),
hour=format(strptime(time,"%Y-%m-%d %H:%M:%S"),'%H') %>% as.integer()) %>%
filter(qa==0) %>%
group_by(id, lon, lat, date, year, doy ) %>%
summarise(blue=mean(blue),
green=mean(green),
red=mean(red),
nir=mean(nir)) %>%
ungroup() %>%
mutate(evi=2.5* (nir-red) / (nir + 6*red - 7.5*blue + 1)) %>%
filter(evi>0, evi<=1) %>%
filter(red>0, green>0, blue>0)
# subset nab data
pollen_df<- nab_with_taxa_df %>%
left_join(meta_df %>% dplyr::select(id, site), by="id") %>%
filter(site==siteoi) %>%
filter(genus==taxaoi_short|family==taxaoi_short)
# subset npn data
npn_df<-npn_df_all %>%
filter(site==siteoi) %>%
filter(taxa==taxaoi_short)
ts_df<-ps_df_proc %>%
left_join(plant_taxa_df, by=c("id", "lon", "lat")) %>%
dplyr::select(id, date, `EVI (PS)`=evi #,
# `G2R (PS)`=g2r, `EBI (PS)`=ebi
) %>%
mutate(id=as.factor(id)) %>%
full_join(pollen_df %>%
dplyr::select(date, `pollen count (NAB)`=count) %>%
mutate(id="pollen") ,
by=c("date", "id") ) %>%
full_join(npn_df %>%
dplyr::select(date, `flower observation (USA-NPN)`=count) %>%
mutate(id="npn") ,
by=c("date", "id") ) %>%
arrange(id, date) %>%
mutate(doy=format(date, "%j") %>% as.numeric()) %>%
mutate(year=format(date, "%Y") %>% as.numeric())
flower_doy_df_years_list<-flower_freq_df_years_list<-vector(mode="list", length=length(year_list))
for (y in 1:length(year_list)) {
yearoi<-year_list[y]
ts_df_subset<-ts_df %>%
filter(doy!=366) %>%
filter(year==yearoi|year==(yearoi-1)|year==(yearoi+1)) %>%
mutate(doy=ifelse(doy>273& year==yearoi-1, doy-365, doy)) %>%
mutate(year=ifelse(doy<=0 & year==yearoi-1, year+1, year)) %>%
mutate(doy=ifelse(doy<152& year==yearoi+1, doy+365, doy)) %>%
mutate(year=ifelse(doy>365 & year==yearoi+1, year-1, year)) %>%
filter(year==yearoi) %>%
gather(key="var", value="value", -date, -id, -doy, -year ) %>%
mutate(var=fct_relevel(var, levels=c( "EVI (PS)", "G2R (PS)","EBI (PS)", "pollen count (NAB)", "flower observation (USA-NPN)"))) %>%
mutate(alpha=case_when(var %in% c("EVI", "G2R")~0.05,
TRUE~1)) %>%
arrange(doy)
ts_df_subset_summary<- ts_df_subset%>%
drop_na(value) %>%
group_by(date, var, doy, year) %>%
summarise(q1=quantile(value, 0.05, na.rm=T),
q2=quantile(value, 0.5, na.rm=T),
q3=quantile(value, 0.95, na.rm=T),
n=n()) %>%
filter(n>1) %>%
ungroup()
flower_df_list<-
foreach (i = 1:length(random_id),
.packages = c("tidyverse", "ptw","greenbrown", "EnvCpt","segmented", "RhpcBLASctl")) %dopar% {
blas_set_num_threads(1)
omp_set_num_threads(1)
debug=F
idoi <- as.character(random_id)[i]
if (debug) {
set.seed(NULL)
idoi<-sample(random_id,1) %>% as.character()
}
flower_df<-get_doy(plant_taxa_df,thres_df_taxa, ts_df_subset, idoi)
if (debug) {
p_1tree<-plot_tree(plant_taxa_df, ts_df_subset, flower_df, idoi)
}
print(paste0(i , " out of ", length(random_id)))
flower_df
}
flower_doy_df<-bind_rows(flower_df_list) %>%
left_join(plant_taxa_df %>% dplyr::select(id, species, lat, lon) %>% mutate(id=as.character(id)), by="id") %>%
mutate(site=siteoi, year=yearoi)
flower_freq_df_list<-vector(mode="list", length=nrow(thres_df_taxa))
for (t in 1:nrow(thres_df_taxa)) {
flower_freq_df_list[[t]]<-flower_doy_df %>%
drop_na(start, end) %>%
filter(direction==thres_df_taxa$direction[t],
thres==thres_df_taxa$threshold[t]) %>%
group_by(doy, thres,direction) %>%
summarise(count=n()) %>%
ungroup() %>%
mutate(freq=count/n())
}
flower_freq_df<-bind_rows(flower_freq_df_list)
flower_freq_df<-flower_freq_df %>%
mutate(doy=factor(doy, levels=c((274-365):(365+151))) ) %>%
complete(doy, thres,direction,fill=list(count=0,freq=0)) %>%
mutate(doy=doy %>% as.character() %>% as.numeric()) %>%
arrange(doy) %>%
full_join(ts_df_subset %>%
filter(id=="pollen") %>%
filter(var=="pollen count (NAB)") %>%
filter(year==yearoi) %>%
dplyr::select( doy, pollen=value),
by="doy") %>%
full_join(ts_df_subset %>%
filter(id=="npn") %>%
filter(var=="flower observation (USA-NPN)") %>%
filter(year==yearoi) %>%
dplyr::select( doy, npn=value),
by="doy") %>%
full_join(ts_df_subset_summary %>%
filter(var=="EVI (PS)") %>%
filter(year==yearoi) %>%
dplyr::select( doy, evi=q2),
by="doy"
) %>%
full_join(ts_df_subset_summary %>%
filter(var=="G2R (PS)") %>%
filter(year==yearoi) %>%
dplyr::select( doy, g2r=q2),
by="doy"
) %>%
mutate(doy=as.numeric(doy)) %>%
mutate(site=siteoi, year=yearoi) %>%
drop_na(doy, thres)
print(paste0(siteoi, ", ", yearoi))
flower_doy_df_years_list[[y]]<-flower_doy_df
flower_freq_df_years_list[[y]]<-flower_freq_df
}
flower_doy_df_siteyears_list[[s]]<-bind_rows(flower_doy_df_years_list)
flower_freq_df_siteyears_list[[s]]<-bind_rows(flower_freq_df_years_list)
}
}
flower_doy_df<-bind_rows(flower_doy_df_siteyears_list) %>%
left_join(data.frame(site=site_list, sitename=sitename_list
# , region=region_list
), by=c("site"))
flower_freq_df<-bind_rows(flower_freq_df_siteyears_list) %>%
group_by(site, year,thres,direction) %>%
mutate(freq_sm=whit1(freq, 30)) %>%
mutate(pollen_in=na.approx(pollen, doy, na.rm=F, maxgap=14)) %>%
mutate(pollen_fill=replace_na(pollen_in, 0)) %>%
mutate(pollen_sm=whitfun(pollen_fill,30)) %>%
ungroup() %>%
dplyr::select(-pollen_in, -pollen_fill) %>%
mutate(pollen=case_when(doy %in% flower_window~ pollen)) %>%
mutate(pollen_sm=case_when(doy %in% flower_window~ pollen_sm,
TRUE~0)) %>%
mutate(npn=case_when(doy %in% flower_window~ npn))
# pollen climatology (historical mean of smoothed pollen count)
pollen_clim<-flower_freq_df %>%
group_by(site,direction, thres,doy) %>%
summarise(pollen_clim=mean(pollen_sm, na.rm=T))
flower_freq_df<-flower_freq_df%>%
left_join(pollen_clim, by=c("site", "direction","thres", "doy")) %>%
left_join(data.frame(site=site_list, sitename=sitename_list
# , region=region_list
), by=c("site"))
output_path<-paste0("/raid/users/ysong67/GitHub/phenology/RS4flower/output/data/", taxaoi,"/")
dir.create(output_path, recursive = T)
write_rds(flower_doy_df,paste0(output_path,"flowering day of year.rds" ))
write_rds(flower_freq_df,paste0(output_path,"flowering frequency.rds" ) )
}
stopCluster(cl)
cl <- makeCluster(50, outfile = "")
registerDoSNOW(cl)
for (taxaoi in taxa_list) {
taxaoi_short<-str_split(taxaoi, " ", simplify = T)[1]
flower_window<-seq(flower_window_df %>% filter(taxa==taxaoi) %>% pull(start),
flower_window_df %>% filter(taxa==taxaoi) %>% pull(end),
by=1)
if (taxaoi %in% c("Ambrosia", "Ulmus late")) {
thres_df_taxa<-thres_df %>% filter(direction=="down")
} else if (taxaoi== "Poaceae early" ) {
thres_df_taxa<-thres_df %>% filter(threshold>=0.5|direction=="up")
} else if (taxaoi== "Poaceae late" ) {
thres_df_taxa<-thres_df %>% filter(threshold>=0.5|direction=="down")
} else {
thres_df_taxa<-thres_df %>% filter(direction=="up")
}
output_path<-paste0("/raid/users/ysong67/GitHub/phenology/RS4flower/output/data/", taxaoi,"/")
# read in leafing phenology data
flower_doy_df<-read_rds(paste0(output_path,"flowering day of year.rds" ))
flower_freq_df<-read_rds(paste0(output_path,"flowering frequency.rds" ) )
# standardize all data to be between 0 and 1
flower_freq_df_standard<-flower_freq_df %>%
mutate(pollen=pollen %>% sqrt()) %>%
mutate(pollen_sm=pollen_sm %>% sqrt()) %>%
mutate(pollen_clim=pollen_clim %>% sqrt()) %>%
group_by(site, sitename, thres) %>%
mutate(freq=(freq-min(freq, na.rm = T))/(max(freq, na.rm = T)-min(freq, na.rm = T))) %>%
mutate(freq_sm=(freq_sm-min(freq_sm, na.rm = T))/(max(freq_sm, na.rm = T)-min(freq_sm, na.rm = T))) %>%
mutate(pollen=(pollen-min(pollen, na.rm = T))/(max(pollen, na.rm = T)-min(pollen, na.rm = T))) %>%
mutate(pollen_clim=(pollen_clim-min(pollen_sm, na.rm = T))/(max(pollen_sm, na.rm = T)-min(pollen_sm, na.rm = T))) %>%
mutate(pollen_sm=(pollen_sm-min(pollen_sm, na.rm = T))/(max(pollen_sm, na.rm = T)-min(pollen_sm, na.rm = T))) %>%
mutate(npn=(npn-min(npn, na.rm = T))/(max(npn, na.rm = T)-min(npn, na.rm = T))) %>%
mutate(evi=(evi-min(evi, na.rm = T))/(max(evi, na.rm = T)-min(evi, na.rm = T))) %>%
mutate(g2r=(g2r-min(g2r, na.rm = T))/(max(g2r, na.rm = T)-min(g2r, na.rm = T))) %>%
ungroup()
# optimize threshold and lag
flower_freq_df_check_list<-vector(mode="list", length=length(site_list))
for (r in 1:length(site_list)) { # select a region, here each site is treated as an individual region
regionoi<-site_list[r]
flower_freq_df_check_region_list<-vector(mode="list", length=nrow(thres_df_taxa))
for (t in 1:nrow(thres_df_taxa)) { # select a threshold
flower_freq_df_standard_subset<-flower_freq_df_standard %>%
filter(site==regionoi) %>%
filter(direction==thres_df_taxa$direction[t],
thres==thres_df_taxa$threshold[t]) %>%
group_by(site, year) %>%
ungroup()
sample_size<-flower_freq_df_standard_subset %>% filter(!is.na(pollen), pollen>0) %>% nrow()
if (sample_size>=5*4) { # only do tuning when there are more than 20 none-zero pollen count. skip the taxa and site otherwise.
pollen_sm_ts<- flower_freq_df_standard_subset %>% pull (pollen_sm) # smoothed pollen count, for tuning
pollen_ts<- flower_freq_df_standard_subset %>% pull (pollen) # pollen count, for calculating accuracy
pollen_clim_ts<- flower_freq_df_standard_subset %>% pull (pollen_clim) # climatology
rmse_clim<-sqrt(mean((pollen_clim_ts-pollen_ts)^2, na.rm=T)) # rmse between climatology and pollen count
lags_list<- -200:200 # possible lags to try
flower_freq_df_check_thres_list<-
foreach (l = 1:length(lags_list),
.packages = c("tidyverse")) %dopar% {
lag=lags_list[l]
if (lag<0) {
flower_freq_df_standard_subset_lag<- flower_freq_df_standard_subset %>% mutate(freq_sm=lead(freq_sm,n=-lag)) %>% mutate(freq_sm=replace_na(freq_sm, 0)) # shift leafing phenology earlier by "lag" days, fill the NA with 0
} else if (lag==0) {
flower_freq_df_standard_subset_lag<-flower_freq_df_standard_subset %>% mutate(freq_sm=replace_na(freq_sm, 0))
} else if (lag>0) {
flower_freq_df_standard_subset_lag<-flower_freq_df_standard_subset %>% mutate(freq_sm=lag(freq_sm,n=lag)) %>% mutate(freq_sm=replace_na(freq_sm, 0)) # shift leafing phenology later by "lag" days, fill the NA with 0
}
freq_ts_lag<- flower_freq_df_standard_subset_lag %>% pull (freq_sm)
rmse<-sqrt(mean((freq_ts_lag-pollen_sm_ts)^2, na.rm=T)) # rmse between remotely-sensed flowering phenology and smoothed pollen count
rmse_ps<-sqrt(mean((freq_ts_lag-pollen_ts)^2, na.rm=T)) # rmse between remotely-sensed flowering phenology and pollen count
print(paste0("region ", r, ", threshold ", t, ", lag ", l))
data.frame(direction=thres_df_taxa$direction[t],thres=thres_df_taxa$threshold[t],lag=lag, rmse=rmse,rmse_ps=rmse_ps, rmse_clim=rmse_clim)
}
flower_freq_df_check_region_list[[t]]<-bind_rows(flower_freq_df_check_thres_list) %>%
arrange(rmse) %>% # choose the lag giving the smallest rmse in the threshold
head(1)
} else {
flower_freq_df_check_region_list[[t]]<-data.frame(thres=numeric(0),lag=numeric(0), rmse=numeric(0), rmse_ps=numeric(0),rmse_clim=numeric(0))
}
}
flower_freq_df_check_list[[r]]<-bind_rows(flower_freq_df_check_region_list) %>%
mutate(region=regionoi) %>%
arrange((rmse))
}
flower_freq_df_check<-bind_rows(flower_freq_df_check_list)
# flower_freq_df_check
write_rds(flower_freq_df_check,paste0(output_path,"accuracy check.rds") )
best_thres<-flower_freq_df_check %>%
group_by(direction, thres) %>%
summarise(rmse=median(rmse)) %>% # median rmse for each threshold
arrange(rmse) %>%
head(1) %>% # keep threshold giving the smallest median rmse
dplyr::select(direction,thres)
# flower_freq_df_check %>% filter(direction==best_thres$direction[1],
# thres==best_thres$thres[1])
# getting time series with best threshold and lag
flower_freq_df_standard_best_list<-vector(mode="list", length=length(site_list))
for (r in 1:length(site_list)) {
regionoi<-site_list[r]
lagoi<-flower_freq_df_check %>% filter(direction==best_thres$direction,thres==best_thres$thres, region==regionoi) %>% pull(lag)
if (length(lagoi)>0) {
flower_freq_df_standard_best<-flower_freq_df_standard %>%
filter(site==regionoi) %>%
filter(direction==best_thres$direction,
thres == best_thres$thres) %>%
ungroup() %>%
group_by(site, year)
if (lagoi<0) {
flower_freq_df_standard_best<- flower_freq_df_standard_best %>% mutate(freq_sm=lead(freq_sm,n=-lagoi)) %>% mutate(freq_sm=replace_na(freq_sm, 0))
} else if (lagoi==0) {
flower_freq_df_standard_best<- flower_freq_df_standard_best %>% mutate(freq_sm=replace_na(freq_sm, 0))
} else if (lagoi>0) {
flower_freq_df_standard_best<- flower_freq_df_standard_best %>% mutate(freq_sm=lag(freq_sm,n=lagoi)) %>% mutate(freq_sm=replace_na(freq_sm, 0))
}
flower_freq_df_standard_best_list[[r]]<-flower_freq_df_standard_best %>% mutate(lag=lagoi)
}
}
flower_freq_df_standard_best<-bind_rows(flower_freq_df_standard_best_list)
# make plots
flower_freq_comp<-ggplot(flower_freq_df_standard_best)+
geom_point(aes(x=doy, y=npn, col="flower observation (USA-NPN)"), alpha=0.5)+
geom_point(aes(x=doy, y=pollen, col="pollen count (NAB)"))+
geom_line(aes(x=doy, y=pollen_clim, col="pollen count (NAB)"),alpha=0.5, lwd=1)+
geom_point(aes(x=doy, y=evi, col="EVI (PS)"), alpha=0.2)+
geom_line(aes(x=doy, y=freq_sm, col="flowering frequency"), lwd=1)+
theme_classic()+
facet_wrap(.~paste0(sitename, " (Lag: ", lag, ")")*year, ncol=4)+
scale_color_manual(values=cols)+
theme(legend.position="bottom")+
theme(legend.title = element_blank())+
ylab("")+
ggtitle(paste0("Taxa: ",taxaoi," (Threshold: ",best_thres$direction, " ",best_thres$thres, ")"))
flower_freq_comp
jpeg(paste0(output_path, "flower frequency compared with other data.jpg"),
height = 1600, width = 1600, res=150)
print(flower_freq_comp)
dev.off()
flower_freq_corr<-ggplot(flower_freq_df_standard_best %>% mutate(year=as.factor(year)))+
geom_point(aes(x=freq_sm, y=pollen , group=year, col=year))+
geom_smooth(aes(x=freq_sm, y=pollen , group=year, col=year), method="lm", se=F, lwd=0.5)+
geom_smooth(aes(x=freq_sm, y=pollen), method="lm")+
theme_classic()+
facet_wrap(.~sitename, ncol=4)+
coord_equal()+
xlim(0, 1)+
ylim(0, 1)+
ylab("pollen count^(1/2)")+
xlab("flowering frequency")
flower_freq_corr
jpeg(paste0(output_path, "flower frequency and pollen count correlation.jpg"),
height = 1200, width = 1600, res=150)
print(flower_freq_corr)
dev.off()
across_site_and_year<-ggplot(flower_freq_df_standard_best %>%
mutate(year=as.factor(year)))+
geom_point(aes(x=doy, y=pollen , group=year, col=year))+
geom_line(aes(x=doy, y=freq_sm, group=year, col=year))+
facet_wrap(.~paste0(sitename, " (Lag: ", lag, ")"), ncol=1)+
theme_classic()+
ylab("")+
ggtitle(paste0("Taxa: ",taxaoi," (Threshold: ",best_thres$direction, " ",best_thres$thres, ")"))
across_site_and_year
jpeg(paste0(output_path, "phenology across site and year.jpg"),
height = 1600, width = 1600, res=150)
print(across_site_and_year)
dev.off()
}
stopCluster(cl)
Display results in a Shiny app.
shinyApp(
ui = fluidPage(
selectInput("taxa", "Taxa:", choices = taxa_list),
selectInput("plot", "Plot:", choices = c("flower frequency compared with other data",
"phenology across site and year",
"flower frequency and pollen count correlation")),
plotOutput("plot")
),
server = function(input, output) {
img_file<-reactive(paste0("/raid/users/ysong67/GitHub/phenology/RS4flower/output/data/",input$taxa,"/",input$plot, ".jpg"))
output$plot = renderPlot({
image_ggplot(image_read(img_file()))
})
},
options = list(height = 500)
)
Compare accuracy with climatology.
flower_freq_df_check_list<-vector(mode="list")
for (taxaoi in taxa_list) {
output_path<-paste0("/raid/users/ysong67/GitHub/phenology/RS4flower/output/data/", taxaoi,"/")
flower_freq_df_check_list[[taxaoi]]<-read_rds(paste0(output_path,"accuracy check.rds")) %>%
mutate(taxa=taxaoi)
}
flower_freq_df_check<-bind_rows(flower_freq_df_check_list)
best_thres<-flower_freq_df_check %>% group_by(taxa, thres) %>% summarise(rmse=median(rmse)) %>% arrange(rmse) %>% slice(1) %>% dplyr::select(taxa, thres)
accuracy_df<-flower_freq_df_check %>%
right_join(best_thres, by=c("taxa", "thres")) %>%
left_join(meta_df %>% dplyr::select(site, sitename), by=c("region"="site"))
accuracy_df %>%
dplyr::select(taxa, site=region, sitename, direction, thres, lag)
## taxa site sitename direction thres lag
## 1 Quercus NY New York up 0.8 -18
## 2 Quercus SJ San Jose up 0.8 -11
## 3 Quercus AT Austin up 0.8 -18
## 4 Quercus ST Seattle up 0.8 -14
## 5 Quercus HT Houston up 0.8 -25
## 6 Quercus TP Tampa up 0.8 -43
## 7 Quercus DT Detroit up 0.8 -11
## 8 Quercus DV Denver up 0.8 -3
## 9 Quercus KC Kansas City up 0.8 -18
## 10 Quercus SL St. Louis up 0.8 -19
## 11 Cupressaceae NY New York up 0.3 -22
## 12 Cupressaceae SJ San Jose up 0.3 68
## 13 Cupressaceae AT Austin up 0.3 -79
## 14 Cupressaceae ST Seattle up 0.3 24
## 15 Cupressaceae HT Houston up 0.3 -46
## 16 Cupressaceae TP Tampa up 0.3 -35
## 17 Cupressaceae DV Denver up 0.3 6
## 18 Cupressaceae KC Kansas City up 0.3 -20
## 19 Cupressaceae SL St. Louis up 0.3 -16
## 20 Ambrosia AT Austin down 1.0 134
## 21 Ambrosia HT Houston down 1.0 126
## 22 Ambrosia TP Tampa down 1.0 37
## 23 Ambrosia DT Detroit down 1.0 38
## 24 Ambrosia DV Denver down 1.0 71
## 25 Ambrosia KC Kansas City down 1.0 68
## 26 Ambrosia SL St. Louis down 1.0 44
## 27 Morus NY New York up 0.8 -16
## 28 Morus SJ San Jose up 0.8 -23
## 29 Morus AT Austin up 0.8 -21
## 30 Morus HT Houston up 0.8 -29
## 31 Morus TP Tampa up 0.8 -48
## 32 Morus DV Denver up 0.8 -9
## 33 Morus KC Kansas City up 0.8 -11
## 34 Morus SL St. Louis up 0.8 -21
## 35 Pinaceae NY New York up 0.6 48
## 36 Pinaceae SJ San Jose up 0.6 83
## 37 Pinaceae ST Seattle up 0.6 19
## 38 Pinaceae HT Houston up 0.6 -20
## 39 Pinaceae TP Tampa up 0.6 -41
## 40 Pinaceae DV Denver up 0.6 33
## 41 Pinaceae SL St. Louis up 0.6 15
## 42 Ulmus early NY New York up 0.8 -51
## 43 Ulmus early SJ San Jose up 0.8 -15
## 44 Ulmus early AT Austin up 0.8 -65
## 45 Ulmus early ST Seattle up 0.8 -56
## 46 Ulmus early HT Houston up 0.8 -67
## 47 Ulmus early DV Denver up 0.8 -69
## 48 Ulmus early KC Kansas City up 0.8 -61
## 49 Ulmus early SL St. Louis up 0.8 -58
## 50 Ulmus late SJ San Jose down 0.6 13
## 51 Ulmus late AT Austin down 0.6 44
## 52 Ulmus late HT Houston down 0.6 80
## 53 Ulmus late KC Kansas City down 0.6 -30
## 54 Ulmus late SL St. Louis down 0.6 -19
## 55 Fraxinus SJ San Jose up 0.6 -20
## 56 Fraxinus AT Austin up 0.6 -40
## 57 Fraxinus ST Seattle up 0.6 -6
## 58 Fraxinus HT Houston up 0.6 -12
## 59 Fraxinus DV Denver up 0.6 -15
## 60 Fraxinus KC Kansas City up 0.6 -6
## 61 Fraxinus SL St. Louis up 0.6 -6
## 62 Betula NY New York up 0.8 -20
## 63 Betula SJ San Jose up 0.8 -11
## 64 Betula ST Seattle up 0.8 -32
## 65 Betula HT Houston up 0.8 -43
## 66 Betula DV Denver up 0.8 -12
## 67 Poaceae early NY New York up 0.3 41
## 68 Poaceae early SJ San Jose up 0.3 150
## 69 Poaceae early AT Austin up 0.3 56
## 70 Poaceae early ST Seattle up 0.3 126
## 71 Poaceae early HT Houston up 0.3 50
## 72 Poaceae early TP Tampa up 0.3 72
## 73 Poaceae early DT Detroit up 0.3 81
## 74 Poaceae early DV Denver up 0.3 75
## 75 Poaceae early KC Kansas City up 0.3 53
## 76 Poaceae early SL St. Louis up 0.3 47
## 77 Poaceae late NY New York down 0.3 -54
## 78 Poaceae late SJ San Jose down 0.3 85
## 79 Poaceae late AT Austin down 0.3 -54
## 80 Poaceae late HT Houston down 0.3 -41
## 81 Poaceae late TP Tampa down 0.3 -60
## 82 Poaceae late DT Detroit down 0.3 -73
## 83 Poaceae late DV Denver down 0.3 -61
## 84 Poaceae late KC Kansas City down 0.3 -84
## 85 Poaceae late SL St. Louis down 0.3 -60
## 86 Acer ST Seattle up 0.9 -30
## 87 Acer HT Houston up 0.9 -45
## 88 Acer DV Denver up 0.9 -68
## 89 Acer KC Kansas City up 0.9 -70
## 90 Acer SL St. Louis up 0.9 -77
## 91 Populus SJ San Jose up 0.7 -4
## 92 Populus ST Seattle up 0.7 -36
## 93 Populus HT Houston up 0.7 -18
## 94 Populus DV Denver up 0.7 -16
## 95 Populus KC Kansas City up 0.7 -22
ggplot(accuracy_df)+
geom_point(aes(x=region, y=rmse_ps), col="dark blue")+
geom_point(aes(x=region, y=rmse_clim), col="dark red")+
facet_wrap(.~taxa)+
ylab("RMSE")+
theme_classic()
accuracy_df %>%
mutate(rmse_diff=rmse_clim-rmse_ps) %>%
drop_na() %>%
group_by(taxa) %>%
summarize (min=min(rmse_diff),
max=max(rmse_diff),
mean=mean(rmse_diff),
median=median(rmse_diff),
proportion=sum(rmse_diff>0)/n()) %>%
ungroup()
## # A tibble: 13 x 6
## taxa min max mean median proportion
## * <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Acer -0.0423 0.0526 0.0215 0.0290 0.8
## 2 Ambrosia -0.0471 0.0192 -0.0237 -0.0302 0.143
## 3 Betula -0.0612 0.105 0.0157 0.00645 0.6
## 4 Cupressaceae -0.0338 0.0789 0.0111 0.00363 0.778
## 5 Fraxinus -0.0415 0.0872 0.0225 0.0249 0.714
## 6 Morus -0.0273 0.0318 0.000910 0.00344 0.5
## 7 Pinaceae -0.0710 0.0542 0.00610 0.0133 0.714
## 8 Poaceae early -0.0724 0.0645 0.0134 0.0186 0.6
## 9 Poaceae late -0.186 0.0507 -0.0357 -0.0274 0.333
## 10 Populus -0.107 0.0564 0.00583 0.0243 0.8
## 11 Quercus -0.0935 0.0639 -0.00322 -0.0108 0.4
## 12 Ulmus early -0.0500 0.0709 -0.00378 -0.000808 0.5
## 13 Ulmus late -0.0954 -0.0214 -0.0568 -0.0704 0
Plot correlation between mean annual temperature (MAT) and lag between green-up/down frequency and pollen count. * A positive lag means leafing phenology leads pollen phenology; a negative lag means leafing phenology lags pollen phenology.
# taxa in chronological order
taxa_chron<-c("Cupressaceae" , "Fraxinus","Ulmus early" ,"Pinaceae" ,"Acer", "Populus","Quercus", "Betula","Morus", "Poaceae early","Poaceae late" , "Ulmus late" , "Ambrosia" )
# data frame with flowering frequency and climate info, grouped into early and late taxa
lag_clim_df<-accuracy_df %>%
dplyr::select(-rmse, -rmse_ps, -rmse_clim) %>%
left_join(chelsa_df, by=c("region"="site")) %>%
mutate(taxa=as.factor(taxa)) %>%
mutate(taxa=fct_relevel(taxa, levels=taxa_chron))%>%
mutate(group=case_when(taxa %in% c("Ulmus late", "Poaceae late", "Ambrosia")~"late",
TRUE~"early"))
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
# plot lag vs. climate
ggplot(lag_clim_df)+
geom_point(aes(x=mat, y=lag, col=region, group=region))+
geom_smooth(aes(x=mat, y=lag), method="lm", se=F)+
geom_smooth(aes(x=mat, y=lag), method="lm", alpha=0.5)+
theme_classic()+
facet_wrap(.~taxa, scales = "free_y", ncol=5)+
xlab("Mean annual temperature (°C)")+
ylab ("Lag between leafing and pollen phenology (day)")
Linear regression to check significance of the correlation. When looking at individual taxa, only 3 were statistically significant.
# linear regression
reg.df<-lag_clim_df %>%
# filter(region!="SJ") %>%
group_by(taxa) %>%
do(broom::tidy(lm(lag ~ mat, .))) %>%
filter(term %in% c("mat")) %>%
dplyr::select( -statistic)
ggplot(reg.df %>% mutate(taxa=fct_relevel(taxa,levels=rev(taxa_chron))))+
geom_point(aes(x=taxa, y=estimate))+
geom_errorbar(aes(x=taxa, ymin=estimate-1.95*std.error, ymax=estimate+1.95*std.error))+
geom_hline(yintercept = 0, lty=2)+
geom_vline(xintercept = 3.5, lty=1)+
theme_classic()+
coord_flip()+
ylab("Slope (day/°C)")+
xlab ("Taxa")
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
summary(reg.df$p.value<0.05)
## Mode FALSE TRUE
## logical 10 3
Linear mixed-effect model with random slope and random intercept to summarize correlation over early and late flowering taxa. * For early-flowering taxa, pollen phenology usually leads leafing phenology. At warmer places, pollen leads leafing even more. This effect was statistically significant. * For early-flowering taxa, pollen phenology usually lags leafing phenology. At warmer places, pollen lags green-down even more. This effect was not statistically significant, possibly because there were only three taxa. It was significant for late-flowering elm trees.
lme.fit<-lme(lag ~ mat, random = ~ 1+mat|taxa, data=lag_clim_df %>% filter(group=="early"),control =lmeControl(opt='optim',optimMethod = "SANN") )
summary(lme.fit)
## Linear mixed-effects model fit by REML
## Data: lag_clim_df %>% filter(group == "early")
## AIC BIC logLik
## 713.3482 727.0082 -350.6741
##
## Random effects:
## Formula: ~1 + mat | taxa
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 47.6913779 (Intr)
## mat 0.9906993 -0.789
## Residual 24.0505312
##
## Fixed effects: lag ~ mat
## Value Std.Error DF t-value p-value
## (Intercept) 15.681029 17.830123 63 0.8794684 0.3825
## mat -2.000099 0.714202 63 -2.8004690 0.0068
## Correlation:
## (Intr)
## mat -0.749
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.2502434 -0.4759990 -0.1362737 0.2622050 3.3179205
##
## Number of Observations: 74
## Number of Groups: 10
lme.fit<-lme(lag ~ mat, random = ~ 1+mat|taxa, data=lag_clim_df %>% filter(group=="late"),control =lmeControl(opt='optim',optimMethod = "SANN"))
summary(lme.fit)
## Linear mixed-effects model fit by REML
## Data: lag_clim_df %>% filter(group == "late")
## AIC BIC logLik
## 224.0688 229.7355 -106.0344
##
## Random effects:
## Formula: ~1 + mat | taxa
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 45.91756 (Intr)
## mat 1.64489 0.228
## Residual 44.01958
##
## Fixed effects: lag ~ mat
## Value Std.Error DF t-value p-value
## (Intercept) -37.89290 43.67995 17 -0.8675126 0.3977
## mat 3.44892 2.36474 17 1.4584807 0.1629
## Correlation:
## (Intr)
## mat -0.642
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -1.3832425 -0.5402016 -0.2215449 0.2550384 2.8860598
##
## Number of Observations: 21
## Number of Groups: 3